In [None]:
# Data Exploration and Cleaning
# Load the dataset into a Pandas DataFrame and display the first 5 rows.
# Check the shape, column names, and summary statistics of the dataset.
# Identify and handle missing values (fill or drop based on the data type).
# Convert Transaction_Date into datetime format and extract year, month, and day as new columns.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# add new plot look theme
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6) # fixed figure size for all plots

In [None]:
# Load the dataset into a Pandas DataFrame and display the first 5 rows.
data = pd.read_csv('credit_card_transactions.csv')
print(data.head())

In [None]:
# Check the shape, column names, and summary statistics of the dataset.
print("Shape of the dataset:", data.shape)
print("Column names:", data.columns.tolist())
print("Summary statistics:\n", data.describe())

In [None]:
# Identify and handle missing values (fill or drop based on the data type).
# check for missing values
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

In [None]:
# check data types
print("Data types of each column:\n", data.dtypes)

In [None]:
# change Transaction_Date to datetime
data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date'])

In [None]:
# check data types
print("Data types of each column:\n", data.dtypes)

In [None]:
data.columns

In [None]:
# add new columns for year, month, day
data['Year'] = data['Transaction_Date'].dt.year
data['Month'] = data['Transaction_Date'].dt.month
data['Day'] = data['Transaction_Date'].dt.day

In [None]:
print(data[['Transaction_Date','Year','Month','Day']].head())

In [None]:
# Retrieve all transactions made in January 2025.
jan_2025 = data[(data['Year'] == 2025) & (data['Month'] == 1)]
jan_2025.head()
# export jan_2025 to csv
jan_2025.to_csv('january_2025_transactions.csv', index=False)

In [26]:
# Find transactions where Amount > 700 and Transaction_Type is "Online".
online_transactions = data[(data['Amount'] > 700) & (data['Transaction_Type'] == "Online")]

In [None]:
# Select only Approved transactions from the dataset.
approved_transactions = data[data['Transaction_Status'] == 'Approved']
# give me number of rows count where Status is Approved
approved_transactions = data[data['Transaction_Status'] == 'Approved']
print("Number of Approved transactions:", len(approved_transactions))

Number of Approved transactions: 399


In [47]:
# Create a new column Discounted_Amount, assuming a 5% discount on all transactions above 500.
data['Discounted_Amount'] = data['Amount']

mask = data['Amount'] > 500

data.loc[mask, 'Discounted_Amount'] = data.loc[mask, 'Amount'] * 0.95

print(data[['Amount', 'Discounted_Amount']].head())



   Amount  Discounted_Amount
0  360.10           360.1000
1  357.02           357.0200
2  829.41           787.9395
3  790.35           750.8325
4  311.26           311.2600
