# CO2 Emission Prediction & Sustainability Tips Model
## Train model and create vector store for sustainability recommendations

In [None]:
# Install required libraries
!pip install pandas scikit-learn sentence-transformers chromadb langchain langchain-community joblib

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import json

## Step 1: Load and Prepare Data

In [None]:
# Upload your CSV file
from google.colab import files
uploaded = files.upload()

In [None]:
# Load dataset
df = pd.read_csv('activity_co2_emission_data.csv')
print('Dataset shape:', df.shape)
print('\nFirst few rows:')
df.head()

In [None]:
# Data preprocessing
df_processed = df.copy()

# Encode categorical features
label_encoder_activity = LabelEncoder()
label_encoder_category = LabelEncoder()

df_processed['Activity_Encoded'] = label_encoder_activity.fit_transform(df_processed['Activity'])
df_processed['Category_Encoded'] = label_encoder_category.fit_transform(df_processed['Category'])

print('\nEncoded data:')
print(df_processed.head())

## Step 2: Train Random Forest Model

In [None]:
# Prepare features and target
X = df_processed[['Activity_Encoded', 'Category_Encoded']]
y = df_processed['AVG CO2 emission']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Model Performance:')
print(f'MSE: {mse:.4f}')
print(f'R2 Score: {r2:.4f}')

## Step 3: Create Sustainability Tips Database

In [None]:
# Comprehensive sustainability tips
sustainability_tips = [
    {'category': 'Transport', 'activity': 'Car', 'emission_level': 'High', 'tip': 'Switch to public transportation like buses or trains to reduce your carbon footprint by up to 75%. Carpooling with colleagues can also significantly reduce emissions per person.', 'impact': 'Can save 4.6 kg CO2 per 20km by switching to bus'},
    {'category': 'Transport', 'activity': 'Car', 'emission_level': 'High', 'tip': 'Consider switching to an electric vehicle (EV) or hybrid car. EVs produce zero direct emissions and are becoming more affordable with government incentives.', 'impact': 'Reduce emissions by 60-70% compared to petrol cars'},
    {'category': 'Transport', 'activity': 'Car', 'emission_level': 'High', 'tip': 'Maintain optimal tire pressure and regular vehicle servicing. This improves fuel efficiency by 10-15% and reduces unnecessary emissions.', 'impact': 'Save 0.5-0.7 kg CO2 per 20km journey'},
    {'category': 'Transport', 'activity': 'Bus', 'emission_level': 'Medium', 'tip': 'You are already making an eco-friendly choice! Consider cycling or walking for shorter distances (under 5km) to further reduce emissions and improve health.', 'impact': 'Zero emissions for short trips'},
    {'category': 'Transport', 'activity': 'Bus', 'emission_level': 'Medium', 'tip': 'Optimize your route planning to minimize total travel distance. Combining multiple errands in one trip reduces overall emissions.', 'impact': 'Save 20-30% emissions through efficient planning'},
    {'category': 'Transport', 'activity': 'Bicycle', 'emission_level': 'Zero', 'tip': 'Excellent choice! Cycling produces zero emissions. Continue this habit and inspire others to cycle. Ensure regular bicycle maintenance for safety and efficiency.', 'impact': 'Zero emissions, saves 4.6 kg CO2 per 20km vs car'},
    {'category': 'Transport', 'activity': 'Bicycle', 'emission_level': 'Zero', 'tip': 'Join local cycling communities to promote cycling infrastructure in your area. Advocate for more bike lanes and secure parking facilities.', 'impact': 'Community-wide emission reduction potential'},
    {'category': 'Household', 'activity': 'AC usage', 'emission_level': 'High', 'tip': 'Set your AC temperature to 24-26°C instead of lower temperatures. Each degree higher can save 3-5% energy and reduce emissions significantly.', 'impact': 'Save 0.3-0.5 kg CO2 per day'},
    {'category': 'Household', 'activity': 'AC usage', 'emission_level': 'High', 'tip': 'Use ceiling fans in combination with AC. This allows you to set AC at higher temperatures while maintaining comfort, reducing energy consumption by 30-40%.', 'impact': 'Save 1.8-2.4 kg CO2 per day'},
    {'category': 'Household', 'activity': 'AC usage', 'emission_level': 'High', 'tip': 'Install solar panels to power your AC with renewable energy. Government subsidies can reduce installation costs by 30-40%.', 'impact': 'Reduce emissions by up to 90% for AC usage'},
    {'category': 'Household', 'activity': 'AC usage', 'emission_level': 'High', 'tip': 'Ensure proper insulation and seal air leaks in your home. This reduces cooling load and can cut AC energy consumption by 25-30%.', 'impact': 'Save 1.5-1.8 kg CO2 per day'},
    {'category': 'Household', 'activity': 'AC usage', 'emission_level': 'High', 'tip': 'Clean AC filters monthly. Dirty filters reduce efficiency by 15% and increase energy consumption significantly.', 'impact': 'Save 0.9 kg CO2 per day'},
    {'category': 'General', 'activity': 'All', 'emission_level': 'All', 'tip': 'Track your carbon footprint regularly using apps or journals. Awareness is the first step towards sustainable living.', 'impact': 'Behavioral change can reduce emissions by 20-30%'},
    {'category': 'General', 'activity': 'All', 'emission_level': 'All', 'tip': 'Participate in local environmental initiatives and tree plantation drives. Trees absorb CO2 and improve air quality.', 'impact': 'One tree absorbs 22 kg CO2 per year'},
    {'category': 'General', 'activity': 'All', 'emission_level': 'All', 'tip': 'Choose renewable energy providers for your home electricity. Many regions now offer green energy options at competitive prices.', 'impact': 'Reduce household emissions by 50-70%'},
    {'category': 'Transport', 'activity': 'All', 'emission_level': 'All', 'tip': 'Consider work-from-home options 2-3 days per week if possible. This eliminates commute emissions and improves work-life balance.', 'impact': 'Save 40-60% of weekly commute emissions'},
    {'category': 'Household', 'activity': 'Energy', 'emission_level': 'Medium', 'tip': 'Switch to LED bulbs throughout your home. LEDs use 75% less energy and last 25 times longer than traditional bulbs.', 'impact': 'Save 50-80 kg CO2 per year per household'},
    {'category': 'Household', 'activity': 'Energy', 'emission_level': 'Medium', 'tip': 'Unplug electronic devices when not in use. Phantom power consumption accounts for 5-10% of residential energy use.', 'impact': 'Save 100-200 kg CO2 per year'}
]

with open('sustainability_tips.json', 'w') as f:
    json.dump(sustainability_tips, f, indent=2)

print(f'Created {len(sustainability_tips)} sustainability tips')

## Step 4: Build Vector Store with ChromaDB

In [None]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings

In [None]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
print('Embedding model loaded successfully')

In [None]:
chroma_client = chromadb.Client(Settings(anonymized_telemetry=False, allow_reset=True))
collection = chroma_client.create_collection(name='sustainability_tips', metadata={'description': 'Sustainability tips for CO2 reduction'})
print('ChromaDB collection created')

In [None]:
for idx, tip_data in enumerate(sustainability_tips):
    text_to_embed = f"{tip_data['category']} {tip_data['activity']} {tip_data['emission_level']}: {tip_data['tip']}"
    embedding = embedding_model.encode(text_to_embed).tolist()
    collection.add(embeddings=[embedding], documents=[tip_data['tip']], metadatas=[{'category': tip_data['category'], 'activity': tip_data['activity'], 'emission_level': tip_data['emission_level'], 'impact': tip_data['impact']}], ids=[f'tip_{idx}'])

print(f'Added {len(sustainability_tips)} tips to vector store')

## Step 5: Save All Models and Encoders

In [None]:
joblib.dump(model, 'co2_prediction_model.pkl')
joblib.dump(label_encoder_activity, 'label_encoder_activity.pkl')
joblib.dump(label_encoder_category, 'label_encoder_category.pkl')

activity_category_mapping = df[['Activity', 'Category']].drop_duplicates().to_dict('records')
with open('activity_category_mapping.json', 'w') as f:
    json.dump(activity_category_mapping, f, indent=2)

print('All models saved successfully!')

## Step 6: Test the Model

In [None]:
test_activity = 'Car(20km)'
test_category = 'Transport'

activity_encoded = label_encoder_activity.transform([test_activity])[0]
category_encoded = label_encoder_category.transform([test_category])[0]
prediction = model.predict([[activity_encoded, category_encoded]])[0]

print(f'Activity: {test_activity}')
print(f'Category: {test_category}')
print(f'Predicted CO2 Emission: {prediction:.2f} kg')

## Step 7: Download All Files

In [None]:
files.download('co2_prediction_model.pkl')
files.download('label_encoder_activity.pkl')
files.download('label_encoder_category.pkl')
files.download('sustainability_tips.json')
files.download('activity_category_mapping.json')

print('All files ready for download!')