In [1]:
"""
Weather-Based Recommendation System
Author: Abhiram Ramakrishnan
Description: This script trains a Machine Learning model to predict recommendations 
(clothing, activity, travel, food) based on weather conditions.
"""

# Import Required Libraries
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report



In [2]:
# ------------- 1. Loading and Cleaning the Dataset ------------- #


DATA_PATH = "DWD.csv"  


if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at path: {DATA_PATH}")

df = pd.read_csv(DATA_PATH, encoding="ISO-8859-1", delimiter=";")


df.rename(columns=lambda x: x.strip().replace(",,,,,,,,,,,,,,,,,", ""), inplace=True)


numeric_columns = [
    "LAT", "LON", "ALTITUDE", "MEAN ANNUAL AIR TEMP", "MEAN MONTHLY MAX TEMP",
    "MEAN MONTHLY MIN TEMP", "MEAN ANNUAL WIND SPEED", "MEAN CLOUD COVER",
    "MEAN ANNUAL SUNSHINE", "MEAN ANNUAL RAINFALL", "MAX MONTHLY WIND SPEED",
    "MAX AIR TEMP", "MAX WIND SPEED", "MAX RAINFALL", "MIN AIR TEMP"
]

for col in numeric_columns:
    df[col] = df[col].astype(str).str.replace(",", ".").astype(float)

df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

In [3]:
# ------------- 2. Define Recommendation Categories ------------- #

def clothing_recommendation(temp, rain):
    if temp < 10:
        return "Wear a jacket, gloves, scarf"
    elif 10 <= temp <= 20:
        return "Light jacket, jeans"
    else:
        return "Shorts, t-shirts, sunglasses" if rain < 1 else "Carry an umbrella, wear waterproof clothing"

def activity_recommendation(temp, cloud, wind, rain):
    if rain > 1:
        return "Stay indoors, read books, visit museums"
    elif temp > 20 and cloud < 50:
        return "Outdoor sports, hiking, cycling"
    elif cloud > 50:
        return "City walks, jogging"
    elif wind > 10:
        return "Indoor activities like yoga, gym"
    else:
        return "Balanced activities, choose based on preference"

def travel_recommendation(temp, rain, wind):
    if temp > 25 and rain < 1:
        return "Beach trips, lakeside visits"
    elif temp < 5 and wind > 10:
        return "Ski resorts, winter travel spots"
    elif rain > 5:
        return "Avoid travel, stay indoors"
    else:
        return "Moderate travel, short trips"

def food_recommendation(temp, rain):
    if temp < 10:
        return "Hot coffee, soups, warm meals"
    elif temp > 25:
        return "Ice cream, cold drinks"
    elif rain > 1:
        return "Tea, comfort food like ramen"
    else:
        return "Regular meals, seasonal choices"


df["Clothing Recommendation"] = df.apply(lambda x: clothing_recommendation(x["MEAN ANNUAL AIR TEMP"], x["MEAN ANNUAL RAINFALL"]), axis=1)
df["Activity Recommendation"] = df.apply(lambda x: activity_recommendation(x["MEAN ANNUAL AIR TEMP"], x["MEAN CLOUD COVER"], x["MEAN ANNUAL WIND SPEED"], x["MEAN ANNUAL RAINFALL"]), axis=1)
df["Travel Recommendation"] = df.apply(lambda x: travel_recommendation(x["MEAN ANNUAL AIR TEMP"], x["MEAN ANNUAL RAINFALL"], x["MEAN ANNUAL WIND SPEED"]), axis=1)
df["Food Recommendation"] = df.apply(lambda x: food_recommendation(x["MEAN ANNUAL AIR TEMP"], x["MEAN ANNUAL RAINFALL"]), axis=1)


In [4]:
# ------------- 3. Preprocessing for Model Training ------------- #

features = numeric_columns
target_labels = ["Clothing Recommendation", "Activity Recommendation", "Travel Recommendation", "Food Recommendation"]

label_encoders = {}
for label in target_labels:
    le = LabelEncoder()
    df[label] = le.fit_transform(df[label])  
    label_encoders[label] = le  

X = df[features]
y = df[target_labels]

X.fillna(X.mean(), inplace=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)


In [5]:
# ------------- 4. Train Machine Learning Model ------------- #

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

In [6]:
# ------------- 5. Evaluate Model Performance ------------- #

y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

print("Classification Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       113
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       1.00      1.00      1.00         7

   micro avg       1.00      1.00      1.00       120
   macro avg       0.50      0.50      0.50       120
weighted avg       1.00      1.00      1.00       120
 samples avg       1.00      1.00      1.00       120



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
# ------------- 6. Test with New Data ------------- #

sample_input = np.array([[15.0, 18.0, 10.0, 3.0, 60.0, 1200.0, 500.0, 2.0, 30.0, 15.0, 50.0, -5.0, 7.0, 3.5, 68.0]])

if sample_input.shape[1] != X_train.shape[1]:
    raise ValueError(f"Sample input must have {X_train.shape[1]} features, but got {sample_input.shape[1]}.")

sample_input_scaled = scaler.transform(sample_input)

sample_prediction = model.predict(sample_input_scaled)

decoded_predictions = {label: label_encoders[label].inverse_transform([pred])[0] for label, pred in zip(target_labels, sample_prediction[0])}

print("\nRecommended Based on Weather Conditions:")
for category, recommendation in decoded_predictions.items():
    print(f"{category}: {recommendation}")



Recommended Based on Weather Conditions:
Clothing Recommendation: Wear a jacket, gloves, scarf
Activity Recommendation: Stay indoors, read books, visit museums
Travel Recommendation: Avoid travel, stay indoors
Food Recommendation: Hot coffee, soups, warm meals


