In [None]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from datetime import datetime
from meteostat import Point, Daily

In [None]:
# --- Load Dataset ---
# Import the dataset uploaded to the repo
url = "https://raw.githubusercontent.com/MaharLeika18/Data-Mining---Python/refs/heads/main/Final_Exam/retail_sales_dataset.csv"
data = pd.read_csv(url)

print("Data loaded successfully!")
print(data.head())

In [None]:
# --- Data Preprocessing ---
# One-hot encode all items 
transactions = data.groupby('Customer ID')['Product Category'].apply(list).values.tolist()

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("\nTransaction data prepared for analysis:")
print(df.head())

In [None]:
# Convert dates to datetime
data['Date'] = pd.to_datetime(data['Date'])
start = data['Date'].min()
end = data['Date'].max()
print(f"Date range: {start.date()} to {end.date()}")

In [None]:
#Get historical weather data for Manila
location = Point(14.5995, 120.9842, 70)

weather_data = Daily(location, start, end)
weather_data = weather_data.fetch()

weather_data.head()

In [None]:
# Add description to the data
def get_weather_description(row):
    temp = row['tavg']  # average temperature
    precip = row['prcp']  # precipitation
    
    if pd.isna(temp) or pd.isna(precip):
        return "Unknown"
    
    if precip > 10: 
        return "Heavy Rain"
    elif precip > 5:
        return "Rainy"
    elif precip > 1:
        return "Light Rain" 
    elif temp > 29:
        return "Sunny"
    elif temp > 27:
        return "Partly Cloudy"
    else: 
        return "Warm and Humid"

# Apply weather descriptions
weather_data['weather'] = weather_data.apply(get_weather_description, axis=1)

print("Weather text descriptions:")
print(weather_data['weather'].value_counts())
weather_data[['tavg', 'prcp', 'weather']].head(10)

In [None]:
# Create weather_data_reset for merging
weather_data_reset = weather_data.reset_index()
weather_data_reset.rename(columns={'time': 'Date'}, inplace=True)

print(f"Shape: {weather_data_reset.shape}")
display(weather_data_reset[['Date', 'weather']].head())

display(weather_data_reset)
# Merge 
data_with_weather = pd.merge(data, weather_data_reset[['Date','weather','tavg','prcp','wspd']], on='Date', how='left')

In [None]:
# Export
data_with_weather.to_csv('retail_data_with_weather.csv', index=False)
display(data_with_weather)