In [None]:
# Task1


# First, I need to import the libraries I'll use
# pandas helps work with data tables (DataFrames)
# numpy helps with math calculations
import pandas as pd
import numpy as np


#Calling the import function from Google Collab
from google.colab import files


# This will show upload buttons - you'll need to click them to upload your files
print("Please upload your CSV files when the upload button appears...")
uploaded = files.upload()


# Step 1: Load the CSV files into DataFrames
# I'll call them cc_df for credit card info and trans_df for transactions
print("Step 1: Loading the data files...")
cc_df = pd.read_csv('cc_info.csv')
trans_df = pd.read_csv('transactions.csv')


# Step 2: Let's look at what's in these files
print("\nStep 2: Looking at the first 5 rows of each file")
print("\nCredit Card Info first 5 rows:")
print(cc_df.head())  # head() shows the first 5 rows
print("\nTransactions first 5 rows:")
print(trans_df.head())


# Step 3: How big are these files? Let's check their shape
# Shape tells us (number of rows, number of columns)
print("\nStep 3: Checking the size of our data")
print(f"Credit Card Info shape: {cc_df.shape}")
print(f"Transactions shape: {trans_df.shape}")


# Step 4: What kind of data is in each column?
print("\nStep 4: Checking data types in each file")
print("\nCredit Card Info data types:")
print(cc_df.info())
print("\nTransactions data types:")
print(trans_df.info())


# Step 5: Are there any missing values?
print("\nStep 5: Checking for missing values")
print("\nMissing values in Credit Card Info:")
print(cc_df.isnull().sum())  # Count missing values in each column
print("\nMissing values in Transactions:")
print(trans_df.isnull().sum())


# Step 6: Let's get some basic statistics about the numbers in transactions
print("\nStep 6: Getting statistics for numbers in Transactions")
print("\nDescriptive statistics for Transactions:")
print(trans_df.describe())


# Step 7: Let's focus on the transaction amounts
# Converting to numpy array for numpy calculations
print("\nStep 7: Analyzing transaction amounts")
amounts = trans_df['transaction_dollar_amount'].to_numpy()


# Calculate statistics using numpy
mean_amount = np.mean(amounts)
min_amount = np.min(amounts)
max_amount = np.max(amounts)


print("\nTransaction Amount Statistics using NumPy:")
print(f"Mean amount: ${mean_amount:.2f}")
print(f"Minimum amount: ${min_amount:.2f}")
print(f"Maximum amount: ${max_amount:.2f}")




#Task 2


# Step 1: Find transactions over $500
print("\nStep 1: Finding big transactions (over $500)")
# Create a new DataFrame with just the big transactions
big_transactions = trans_df[trans_df['transaction_dollar_amount'] > 500]
print("\nHere are the first few big transactions:")
print(big_transactions.head())
print(f"Total number of transactions over $500: {len(big_transactions)}")


# Step 2: Create a simplified DataFrame with just the columns we want
print("\nStep 2: Creating a simpler version of our transactions")
# Pick out just the columns we want
simple_transactions = trans_df[['credit_card', 'transaction_dollar_amount', 'date']]
print("\nHere's what our simplified data looks like:")
print(simple_transactions.head())


# Step 3: Add a new column to categorize transactions
print("\nStep 3: Categorizing transactions as High, Medium, or Low")
def categorize_amount(amount):
   # Helper function to categorize the amount
   if amount > 1000:
       return 'High'
   elif amount > 500:
       return 'Medium'
   else:
       return 'Low'


# Add the new category column
trans_df['amount_category'] = trans_df['transaction_dollar_amount'].apply(categorize_amount)
print("\nHere's our data with categories:")
print(trans_df[['transaction_dollar_amount', 'amount_category']].head())


# Step 4: Combine (merge) the credit card info with transactions
print("\nStep 4: Combining credit card info with transactions")
# Merge the DataFrames based on the credit_card column
merged_df = pd.merge(trans_df, cc_df, on='credit_card', how='left')
print("\nHere's what our combined data looks like:")
print(merged_df.head())


# Step 5: Calculate average transaction amount by state
print("\nStep 5: Finding average transaction amount by state")
# Group by state and calculate mean transaction amount
state_averages = merged_df.groupby('state')['transaction_dollar_amount'].mean()
print("\nHere are the average transaction amounts by state:")
print(state_averages)


# Let's make this more readable by formatting as dollars
print("\nFormatted average transaction amounts by state:")
for state, amount in state_averages.items():
   print(f"{state}: ${amount:.2f}")


# Optional: Some extra analysis that might be interesting
print("\nExtra Analysis:")
print(f"State with highest average transactions: {state_averages.idxmax()}")
print(f"State with lowest average transactions: {state_averages.idxmin()}")


import pandas as pd

# Assuming you have the transactions DataFrame loaded, and it's called trans_df

# 1. Identify transactions greater than $300
high_value_transactions = trans_df[trans_df['transaction_dollar_amount'] > 300]

# 2. Sort by credit_card and date
high_value_transactions = high_value_transactions.sort_values(by=['credit_card', 'date'])

# 3. Calculate time difference between consecutive transactions
high_value_transactions['date'] = pd.to_datetime(high_value_transactions['date'])
high_value_transactions['time_diff'] = high_value_transactions.groupby('credit_card')['date'].diff()

# 4. Identify suspicious transactions
high_value_transactions['suspicious'] = (high_value_transactions['time_diff'] <= pd.Timedelta(days=3)).fillna(False)

# 5. Print relevant columns
print(high_value_transactions[['credit_card', 'transaction_dollar_amount', 'date', 'suspicious']])



: 