In [None]:
import pandas as pd
import numpy as np
import requests
from datetime import date


import tensorflow as tf
from tensorflow.keras.applications import VGG16 # Excellent for feature extraction
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
import os

In [None]:
# DO not share your API key with anyone
API_KEY = "8375c51346b95ef4fa94b68dfc63b436"
CITY_NAME = "London"
UNITS = "metric"
BASE_URL = "http://api.openweathermap.org/data/2.5/weather"

In [None]:
def fetch_weather_data(city, api_key, units):
    # 1. Build the query parameters
    params = {
        'q': city,
        'appid': api_key,
        'units': units
    }

    # 2. Make the API call
    try:
        response = requests.get(BASE_URL, params=params)
        response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
        data = response.json()

        # 3. Extract the required features from the JSON response
        weather_features = {
            'max_temp': data['main']['temp_max'],
            'current_temp': data['main']['temp'],
            'feels_like': data['main']['feels_like'],
            'description': data['weather'][0]['description'], # e.g., 'light rain'
            'wind_speed': data['wind']['speed'],
            'humidity': data['main']['humidity']
        }

        return weather_features

    except requests.exceptions.RequestException as e:
        print(f"Error fetching weather data: {e}")
        return None
    except KeyError as e:
        print(f"Error parsing weather data (Missing key: {e}). API response structure may have changed.")
        return None

# 4. Test the function
weather_info = fetch_weather_data(CITY_NAME, API_KEY, UNITS)

if weather_info:
    print(f"\n--- Daily Weather Features For {CITY_NAME} ---")
    print(weather_info)
    # Convert to a format your ML model expects (e.g., a Pandas Series)
    weather_series = pd.Series(weather_info)


--- Daily Weather Features For London ---
{'max_temp': 16.67, 'current_temp': 15.9, 'feels_like': 15.63, 'description': 'broken clouds', 'wind_speed': 2.57, 'humidity': 80}


In [None]:
def engineer_weather_features(raw_data, current_date=None):
    # Check if raw_data is not None before accessing its attributes
    if raw_data is None:
        return None

    # Use today's date if not provided
    if current_date is None:
        current_date = date.today()

    temp = raw_data['feels_like']
    desc = raw_data['description']

    engineered_features = {}

    # --- 1. Comfort Category (Temperature & Humidity) ---
    # Based on feels_like temp (C)
    if temp > 30.0:
        comfort_category = 'Hot/Humid'
    elif 24.0 <= temp <= 30.0:
        comfort_category = 'Warm/Mild'
    elif 20.0 <= temp < 24.0:
        comfort_category = 'Cool/Layering'
    else: # Less than 20C (typical Harmattan minimums)
        comfort_category = 'Cold/Harmattan'

    engineered_features['Comfort_Category'] = comfort_category

    # --- 2. Precipitation Risk ---
    if 'heavy rain' in desc or 'thunderstorm' in desc or 'snow' in desc:
        precipitation_risk = 2 # High Risk: Need heavy rain gear/waterproof shoes
    elif 'rain' in desc or 'drizzle' in desc or 'shower' in desc:
        precipitation_risk = 1 # Low Risk: Need umbrella/light jacket
    else:
        precipitation_risk = 0 # None

    engineered_features['Precipitation_Risk'] = precipitation_risk

    # --- 3. Harmattan Impact (Date-Based) ---
    # General Harmattan window: Dec 15 - Feb 15
    current_month = current_date.month
    current_day = current_date.day

    if (current_month == 1) or \
       (current_month == 12 and current_day >= 15) or \
       (current_month == 2 and current_day <= 15):
        # A simple check; could be refined with humidity/wind checks
        harmattan_impact = 2 # Peak: Need cover/long sleeves due to dust/dryness
    elif current_month in [11, 3]: # Shoulder months
        harmattan_impact = 1 # Layering advised
    else:
        harmattan_impact = 0 # None

    engineered_features['Harmattan_Impact'] = harmattan_impact

    # --- 4. Final Numerical/User Features (for the ML Model) ---
    engineered_features['Feels_Like_Temp'] = temp
    engineered_features['Humidity_Pct'] = raw_data['humidity']

    # Example User Input (You'll get this from the student later)
    engineered_features['Occasion_Formality'] = 3.0 # Example: Casual Lecture

    return engineered_features

# --- Running the Feature Engineering ---
if weather_info:
    final_features = engineer_weather_features(weather_info)
    print("\n--- Engineered Features for ML Model ---")
    print(final_features)


--- Engineered Features for ML Model ---
{'Comfort_Category': 'Cold/Harmattan', 'Precipitation_Risk': 0, 'Harmattan_Impact': 1, 'Feels_Like_Temp': 15.63, 'Humidity_Pct': 80, 'Occasion_Formality': 3.0}


In [None]:
# Load VGG16 pre-trained on ImageNet, excluding the top (classification) layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create a new model that outputs the feature vector right before the classification
feature_extractor = Model(inputs=base_model.input,
                          outputs=base_model.layers[-2].output)

# Freeze the layers (we are using it only for extraction, not training)
for layer in feature_extractor.layers:
    layer.trainable = False

In [None]:
def preprocess_image(img_path):
    # 1. Load the image and resize to VGG16 standard (224x224)
    img = image.load_img(img_path, target_size=(224, 224))
    # 2. Convert the image to a NumPy array
    img_array = image.img_to_array(img)
    # 3. Expand dimensions to fit model input (batch size)
    img_array = np.expand_dims(img_array, axis=0)
    # 4. Apply VGG16 specific preprocessing (scaling/centering)
    img_array = tf.keras.applications.vgg16.preprocess_input(img_array)
    return img_array

In [None]:
import io
import re

# 1. Paste your original shareable URL here
SHEET_URL = 'https://docs.google.com/spreadsheets/d/1-3MwJjgWSDZg-oEcf4PT8kEtHuXcexGeM5Y_3Xrx5xE/edit?gid=0#gid=0'

# 2. Extract spreadsheet ID and gid from the URL
match = re.search(r'/d/([a-zA-Z0-9_-]+)/edit(?:.*gid=(\d+))?', SHEET_URL)

if match:
    spreadsheet_id = match.group(1)
    gid = match.group(2) if match.group(2) else '0' # Default gid to 0 if not found
    # Construct the correct export URL
    export_url = f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}"
else:
    print("Error: Could not parse spreadsheet ID or gid from the URL.")
    export_url = None # Set to None to prevent further errors

if export_url:
    # 3. Make the API call to get the CSV content
    try:
        response = requests.get(export_url)
        response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
        csv_content = io.StringIO(response.text)

        # 4. Read the data directly into a DataFrame from the CSV content
        # Using engine='python' can sometimes help with parsing difficult files
        wardrobe_df = pd.read_csv(csv_content, on_bad_lines='skip', engine='python')

        print("Data loaded successfully from Google Sheet:")
        display(wardrobe_df.head())

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Google Sheet: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

Data loaded successfully from Google Sheet:


Unnamed: 0,item_id,image_path,category,sub-category,wamrth_score (1-5),Formality_score (1-5)
0,1,Arsen.jpg,top,jersey,2,1
1,2,art pants.jpg,bottom,gray pants,3,5
2,3,ather jackets.jpg,top,brown jacket,5,4
3,4,black jeans.jpg,bottom,black jeans,3,3
4,5,black round neck.jpg,top,black round neck,3,2


In [None]:
all_feature_vectors = []

# Assuming your images are accessible via the Image_Path column
for index, row in wardrobe_df.iterrows():
    img_path = os.path.join("/content/drive/MyDrive/Clothing Recommendation System/Data/Images/", row['image_path'])

    if os.path.exists(img_path):
        processed_img = preprocess_image(img_path)

        # Get the feature vector (embedding)
        features = feature_extractor.predict(processed_img)

        # Flatten the vector and append it
        all_feature_vectors.append(features.flatten())
    else:
        print(f"Image not found for Item_ID {row['item_id']}")
        # Append a placeholder for missing images (e.g., zeros)
        all_feature_vectors.append(np.zeros(25088)) # VGG16 feature vector size

# Convert the list of vectors into a DataFrame
feature_df = pd.DataFrame(all_feature_vectors)

# Concatenate the new features with the original DataFrame
wardrobe_df = pd.concat([wardrobe_df, feature_df], axis=1)

# Save the updated DataFrame for use in Phase 3
wardrobe_df.to_csv("wardrobe_inventory_features.csv", index=False)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 690ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 519ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 513ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 510ms/step
Image not found for Item_ID 5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 506ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 504ms/step
Image not found for Item_ID 10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488ms/step
Image not found for Item_ID 13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 513ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import os
import pandas as pd # Assuming you have already loaded your wardrobe_df

# --- Configuration ---
IMG_SIZE = (224, 224) # Standard input size for VGG16
VECTOR_SIZE = 25088   # Output size of the VGG16 layer we're using

# 1. Load the pre-trained VGG16 model
#    include_top=False means we strip the final classification layer.
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

# 2. Create the extractor model (outputs the flattened features before classification)
feature_extractor = Model(inputs=base_model.input,
                          outputs=tf.keras.layers.GlobalMaxPooling2D()(base_model.output))
                          # Using GlobalMaxPooling2D to reduce vector size (e.g., to 512 or 1024)
                          # Use base_model.layers[-2].output for the full 25088 vector if preferred

print(f"✅ VGG16 Extractor Model loaded. Output Feature Vector Size: {feature_extractor.output_shape[1]}")

# --- Image Preprocessing Function ---
def preprocess_image_for_vgg(img_path):
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = tf.keras.applications.vgg16.preprocess_input(img_array)
    return img_array

# --- Feature Extraction Loop ---
all_feature_vectors = []

# Define the base path for your images. This needs to match where your image files are stored.
# Based on cell 8VKTgBULvTY1, this path was used previously.
WARDROBE_IMAGE_BASE_PATH = "/content/drive/MyDrive/Clothing Recommendation System/Data/Images/"

# Create the 'Full_Path' column using the 'image_path' from the DataFrame
# This assumes 'image_path' column contains filenames that can be joined with WARDROBE_IMAGE_BASE_PATH.
# If 'image_path' contains URLs, those would need a different handling (e.g., downloading the image first).
wardrobe_df['Full_Path'] = wardrobe_df['image_path'].apply(lambda x: os.path.join(WARDROBE_IMAGE_BASE_PATH, x))

# Use the 'Full_Path' column you created in the previous step
for index, row in wardrobe_df.iterrows():
    full_path = row['Full_Path']

    if os.path.exists(full_path):
        processed_img = preprocess_image_for_vgg(full_path)

        # Predict the feature vector
        features = feature_extractor.predict(processed_img, verbose=0)

        all_feature_vectors.append(features.flatten())
    else:
        print(f"Image not found at: {full_path}")
        # Append a placeholder for missing images (e.g., zeros)
        all_feature_vectors.append(np.zeros(feature_extractor.output_shape[1]))

# Convert the list of vectors into a new DataFrame
feature_df = pd.DataFrame(all_feature_vectors, index=wardrobe_df.index)

# Rename the feature columns (e.g., feature_0, feature_1, ...)
feature_df.columns = [f'feature_{i}' for i in range(feature_df.shape[1])]

# Concatenate the new features with the original DataFrame
wardrobe_df = pd.concat([wardrobe_df, feature_df], axis=1)

print("✅ Feature Extraction Complete.")
print(f"Final DataFrame Shape: {wardrobe_df.shape}")

# Define the output path for the CSV
output_path = "/content/drive/MyDrive/Clothing Recommendation System/data/wardrobe_inventory_features.csv"

# Ensure the directory exists before saving
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Save the updated DataFrame to Drive for use in Phase 3/4
wardrobe_df.to_csv(output_path, index=False)

✅ VGG16 Extractor Model loaded. Output Feature Vector Size: 512
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/black round neck.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/cardgian.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/carg short.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/green round neck.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/long sleeve shirt.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/Long-sleeve sweartshirt.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/round neck short sleeve,jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/shortss.jpg
Image not found at: /content/drive/MyDrive/Clothing Recommendation System/Data/Images/vi

In [None]:
# Context for today (Engineered features from Phase 1)
daily_context = {
    'Comfort_Category': final_features['Comfort_Category'],     # 24°C - 30°C
    'Precipitation_Risk': final_features['Precipitation_Risk'],             # Low Rain Risk
    'Harmattan_Impact': final_features['Harmattan_Impact'],               # None
    'Occasion_Formality_Target': final_features['Occasion_Formality'],    # E.g., Group Presentation
    'Occasion_Warmth_Tolerance': 1     # Max warmth score tolerated for the day
}

# Define the set of item categories required for a full outfit
OUTFIT_COMPONENTS = ['top', 'bottom', 'feet']

In [None]:
from itertools import product
import random

# Rename columns for easier access
wardrobe_df.rename(columns={
    'wamrth_score (1-5)': 'warmth_score',
    'Formality_score (1-5)': 'formality_score'
}, inplace=True)

# Filter items into category lists (e.g., list of all T-shirts, list of all Jeans)
item_pools = {cat: wardrobe_df[wardrobe_df['category'] == cat] for cat in OUTFIT_COMPONENTS}

# Separate Complete Garments
complete_garments = wardrobe_df[wardrobe_df['category'] == 'Complete_Garment']

# Generate all standard outfits (Top, Bottom, Outerwear, Shoes)
# Since Outerwear is no longer a mandatory component, we adjust the product function
standard_combinations = list(product(item_pools['top'].itertuples(),
                                     item_pools['bottom'].itertuples(),
                                     item_pools['feet'].itertuples()))

# Initialize the final list of scored outfits
scored_outfits = []

# --- Logic Placeholder: Compatibility Model ---
def get_compatibility_score(outfit_items):
    # **In a real model (Phase 3), this would run the trained MLP model**
    # For now, assume most items are compatible (Score 0.8)
    # The score will only be low (0.3) if a very high/low Formality_Score item is present
    formalities = [item.formality_score for item in outfit_items]
    if max(formalities) - min(formalities) > 3.0: # Check for huge formality mismatch
        return 0.3 # Low score for mixing a 5 (Agbada) and a 1 (Gym shorts)
    return 0.8


# --- Scoring Logic ---
for outfit_tuple in standard_combinations:

    # Calculate outfit scores based on component averages
    avg_warmth = np.mean([item.warmth_score for item in outfit_tuple])
    avg_formality = np.mean([item.formality_score for item in outfit_tuple])

    # 1. Compatibility Check (Placeholder)
    compatibility_score = get_compatibility_score(outfit_tuple)

    # 2. Appropriateness Scoring (Weather & Occasion Rules)

    # Initialize penalty
    penalty = 0

    # A. Weather Warmth Penalty
    if avg_warmth > daily_context['Occasion_Warmth_Tolerance']:
        penalty += 0.5 * (avg_warmth - daily_context['Occasion_Warmth_Tolerance'])

    # B. Rain Risk Penalty
    # Check if any item in the current outfit is rain resistant if there's a risk
    if daily_context['Precipitation_Risk'] > 0 and not any(getattr(item, 'Is_Rain_Resistant', False) for item in outfit_tuple):
        penalty += 0.7 # Heavy penalty for non-waterproof shoes/outerwear in rain

    # C. Formality Penalty
    formality_mismatch = abs(avg_formality - daily_context['Occasion_Formality_Target'])
    penalty += 1.0 * formality_mismatch # Heavy penalty for missing the formality mark

    # --- Final Score Calculation ---
    # Score is based on high compatibility, minus penalties
    final_score = compatibility_score - penalty

    scored_outfits.append({
        'items': [item.item_id for item in outfit_tuple],
        'score': final_score,
        'avg_formality': avg_formality,
        'avg_warmth': avg_warmth
    })

# --- Final Step: Rank and Recommend ---
final_recommendations = sorted(scored_outfits, key=lambda x: x['score'], reverse=True)

print("\n--- TOP 3 Outfit Recommendations ---")
for i, rec in enumerate(final_recommendations[:3]):
    item_names = wardrobe_df[wardrobe_df['item_id'].isin(rec['items'])]['sub-category'].tolist()
    print(f"#{i+1}: {', '.join(item_names)}")
    print(f"   Score: {rec['score']:.2f} | Warmth: {rec['avg_warmth']:.1f} | Formality: {rec['avg_formality']:.1f}")


--- TOP 3 Outfit Recommendations ---
#1: Man city, shorts 1, black palm 1
   Score: 0.30 | Warmth: 2.0 | Formality: 3.0
#2: Man city, black palm 1, black shorts
   Score: 0.30 | Warmth: 2.0 | Formality: 3.0
#3: shorts 1, black palm 1, white round neck
   Score: 0.30 | Warmth: 2.0 | Formality: 3.0
