In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
!pip3 install wordcloud -q
from wordcloud import WordCloud

In [None]:
df = pd.read_csv('output.csv', delimiter='|')

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
for col in df:
    print(col+": "+str(df[col].nunique()))

In [None]:
#Setting up data types
df['Activity'] = df['Activity'].astype('category')
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
df['Recipient'] = df['Recipient'].astype('category')
df['Account'] = df['Account'].astype('category')
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.time
df['Status'] = df['Status'].astype('category')

In [None]:
# Remove non-completed transactions and 'Used' activity
df = df[(df['Status'] == 'Completed') & (df['Activity'] != 'Used')]

# Create a new column for the transaction amount
df['TransactionAmount'] = np.where(df['Activity'].isin(['Paid', 'Sent']), -df['Amount'], df['Amount'])

# Combine Date and Time columns
df['DateTime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'].astype(str))

### Top Payee

In [None]:
# Top payee (sent money to)
top_payee = df[df['TransactionAmount'] < 0].groupby('Recipient')['TransactionAmount'].sum().nsmallest(1)
top_payee_name = top_payee.index[0]
top_payee_amount = abs(top_payee.values[0])  # Taking absolute value for easier reading

print(f"\nTop Payee: {top_payee_name}")
print(f"Total amount paid: {top_payee_amount:.2f}")

### Month-wise spending

In [None]:
# Set the date as the index
df_dated = df.set_index('DateTime')

# Month-wise spending
monthly_spending = df_dated[df_dated['TransactionAmount'] < 0].resample('M')['TransactionAmount'].sum().abs()

plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_spending)
plt.title('Monthly Spending Over Time')
plt.xlabel('Date')
plt.ylabel('Total Spending')
plt.xticks(rotation=45)
plt.show()

### Weekly Day Wise Spending

In [None]:
# Create a new column for the day of the week
df['DayOfWeek'] = df['DateTime'].dt.day_name()

# Calculate daily spending
daily_spending = df[df['TransactionAmount'] < 0].groupby('DayOfWeek')['TransactionAmount'].sum().abs()

# Define the order of days
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Reorder the daily spending data
daily_spending = daily_spending.reindex(days_order)

plt.figure(figsize=(10, 6))
sns.barplot(x=daily_spending.index, y=daily_spending.values, order=days_order)
plt.title('Total Spending by Day of Week')
plt.xlabel('Day of Week')
plt.ylabel('Total Spending')
plt.xticks(rotation=45)
plt.show()

### Frequency of transaction to a particular person

In [None]:
def plot_transaction_frequency(df, recipient):
    recipient_transactions = df[df['Recipient'] == recipient]

    plt.figure(figsize=(12, 6))
    sns.histplot(recipient_transactions['DateTime'], bins=30, kde=True)
    plt.title(f'Transaction Frequency for {recipient}')
    plt.xlabel('Date')
    plt.ylabel('Number of Transactions')
    plt.xticks(rotation=45)
    plt.show()

# Example: Plot for the top payee
top_payee = top_payees.index[2]
plot_transaction_frequency(df, top_payee)

### Distribution of Transaction amounts

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(df['TransactionAmount'], bins=50, kde=True)
plt.title('Distribution of Transaction Amounts')
plt.xlabel('Transaction Amount')
plt.ylabel('Frequency')
plt.show()

### Heatmap of Transactions by Day and Hour

In [None]:
# 6. Heatmap of transactions by day and hour

df['Hour'] = df['DateTime'].dt.hour
df['Day'] = df['DateTime'].dt.day_name()

# Define the order of days
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

heatmap_data = df.pivot_table(values='TransactionAmount', index='Hour', columns='Day', aggfunc='count')

# Reorder the columns based on days_order
heatmap_data = heatmap_data.reindex(columns=days_order)

plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, cmap='YlOrRd', annot=True, fmt='.0f')
plt.title('Transaction Heatmap by Day and Hour')
plt.xlabel('Day of Week')
plt.ylabel('Hour of Day')
plt.show()