In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
import joblib

# Load the dataset
data = pd.read_csv(r"C:\Users\ashwini\Downloads\flask-login-master\flask-login-master\Carbon Emission.csv")

# Handle missing values
data.fillna(data.mean(numeric_only=True), inplace=True)  # Fill numeric missing values with mean
for col in data.select_dtypes(include='object').columns:
    data[col].fillna(data[col].mode()[0], inplace=True)  # Fill categorical missing values with mode

# Map non-numeric values to numeric equivalents
mappings = {
    'Social Activity': {'often': 3, 'sometimes': 2, 'rarely': 1, 'never': 0},
    'Frequency of Traveling by Air': {'very frequently': 4, 'frequently': 3, 'rarely': 2, 'never': 1},
    'Waste Bag Size': {'small': 1, 'medium': 2, 'large': 3, 'extra large': 4},
    'How Often Shower': {'daily': 1, 'weekly': 7, 'monthly': 30},
    'Energy efficiency': {'No': 0, 'Sometimes': 1, 'Yes': 2}  # Added mapping for 'Energy efficiency'
}

# Apply mappings
for column, mapping in mappings.items():
    if column in data.columns:
        data[column] = data[column].map(mapping)

# Drop 'Recycling' and 'Cooking_With' columns
data = data.drop(['Recycling', 'Cooking_With'], axis=1)

# Encode remaining categorical columns using LabelEncoder
categorical_columns = ['Body Type', 'Sex', 'Diet', 'Transport', 'Vehicle Type', 'Heating Energy Source']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col].astype(str))  # Convert to string before encoding
    label_encoders[col] = le

# Verify all columns are numeric
non_numeric_cols = data.dtypes[data.dtypes.apply(lambda x: x not in ['int64', 'float64'])].index
if non_numeric_cols.any():
    print("Non-numeric columns after preprocessing:", non_numeric_cols)
    raise ValueError("Some columns are still non-numeric. Please review the preprocessing steps.")

# Scale numerical columns
numerical_columns = ['Monthly Grocery Bill', 'Vehicle Monthly Distance Km', 'Waste Bag Weekly Count', 
                     'How Long TV PC Daily Hour', 'How Many New Clothes Monthly', 'How Long Internet Daily Hour']
scaler = StandardScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Split data into features (X) and target (y)
X = data.drop('CarbonEmission', axis=1)
y = data['CarbonEmission']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE: {rmse}")










model.save("carbon_model.pkl")



# Save the trained model and preprocessing objects
joblib.dump(model, "carbon_footprint_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Model and preprocessors saved successfully.")






#--------------------------------------------

# Load the trained model, scaler, and label encoders
model = joblib.load('carbon_footprint_model.pkl')
label_encoders = joblib.load('label_encoders.pkl')
scaler = joblib.load('scaler.pkl')

# Sample input data without 'Recycling' and 'Cooking_With' columns
input_data = {
    'Body Type': ['overweight'],
    'Sex': ['female'],
    'Diet': ['pescatarian'],
    'How Often Shower': ['daily'],
    'Heating Energy Source': ['coal'],
    'Transport': ['public'],
    'Vehicle Type': ['petrol'],
    'Social Activity': ['often'],
    'Monthly Grocery Bill': [230],
    'Frequency of Traveling by Air': ['frequently'],
    'Vehicle Monthly Distance Km': [210],
    'Waste Bag Size': ['large'],
    'Waste Bag Weekly Count': [4],
    'How Long TV PC Daily Hour': [7],
    'How Many New Clothes Monthly': [26],
    'How Long Internet Daily Hour': [1],
    'Energy efficiency': ['No']
}

# Convert the input data to a DataFrame
input_df = pd.DataFrame(input_data)

# Apply the same mappings for non-numeric columns
for column, mapping in mappings.items():
    if column in input_df.columns:
        input_df[column] = input_df[column].map(mapping)

# Encode categorical columns using the saved label encoders
for col, le in label_encoders.items():
    input_df[col] = le.transform(input_df[col].astype(str))

# Scale numerical columns using the saved scaler
numerical_columns = ['Monthly Grocery Bill', 'Vehicle Monthly Distance Km', 'Waste Bag Weekly Count',
                     'How Long TV PC Daily Hour', 'How Many New Clothes Monthly', 'How Long Internet Daily Hour']
input_df[numerical_columns] = scaler.transform(input_df[numerical_columns])

# Predict the carbon emission
predicted_emission = model.predict(input_df)

# Output the predicted carbon emission
print(f"Predicted Carbon Emission: {predicted_emission[0]}")




Non-numeric columns after preprocessing: Index(['Body Type', 'Sex', 'Diet', 'Heating Energy Source', 'Transport',
       'Vehicle Type'],
      dtype='object')


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mode()[0], inplace=True)  # Fill categorical missing values with mode


ValueError: Some columns are still non-numeric. Please review the preprocessing steps.

In [20]:
import pandas as pd
import joblib

# Load the trained model, scaler, and label encoders
model = joblib.load('carbon_footprint_model.pkl')
label_encoders = joblib.load('label_encoders.pkl')
scaler = joblib.load('scaler.pkl')

# Input data (use the same values as provided)
input_data = {
    'Body Type': ['overweight'],  # Input your value
    'Sex': ['female'],            # Input your value
    'Diet': ['pescatarian'],      # Input your value
    'How Often Shower': ['daily'],  # Input your value
    'Heating Energy Source': ['coal'],  # Input your value
    'Transport': ['public'],         # Input your value
    'Vehicle Type': ['petrol'],          # Input your value or None if not applicable
    'Social Activity': ['often'],    # Input your value
    'Monthly Grocery Bill': [230],   # Input your value
    'Frequency of Traveling by Air': ['frequently'],  # Input your value
    'Vehicle Monthly Distance Km': [210],  # Input your value
    'Waste Bag Size': ['large'],     # Input your value
    'Waste Bag Weekly Count': [4],   # Input your value
    'How Long TV PC Daily Hour': [7],  # Input your value
    'How Many New Clothes Monthly': [26],  # Input your value
    'How Long Internet Daily Hour': [1],  # Input your value
    'Energy efficiency': ['No'],     # Input your value
    'Recycling': [['Metal']],         # Input your value
    'Cooking_With': [['Stove', 'Oven']]  # Input your value
}

# Convert the input data to a DataFrame
input_df = pd.DataFrame(input_data)

# Apply the same mappings for non-numeric columns as in the training phase
mappings = {
    'Social Activity': {'often': 3, 'sometimes': 2, 'rarely': 1, 'never': 0},
    'Frequency of Traveling by Air': {'very frequently': 4, 'frequently': 3, 'rarely': 2, 'never': 1},
    'Waste Bag Size': {'small': 1, 'medium': 2, 'large': 3, 'extra large': 4},
    'How Often Shower': {'daily': 1, 'weekly': 7, 'monthly': 30},
    'Energy efficiency': {'No': 0, 'Sometimes': 1, 'Yes': 2}
}

# Apply mappings
for column, mapping in mappings.items():
    if column in input_df.columns:
        input_df[column] = input_df[column].map(mapping)

# Encode categorical columns using the saved label encoders
for col, le in label_encoders.items():
    input_df[col] = le.transform(input_df[col].astype(str))

# Scale numerical columns using the saved scaler
numerical_columns = ['Monthly Grocery Bill', 'Vehicle Monthly Distance Km', 'Waste Bag Weekly Count',
                     'How Long TV PC Daily Hour', 'How Many New Clothes Monthly', 'How Long Internet Daily Hour']
input_df[numerical_columns] = scaler.transform(input_df[numerical_columns])

# Predict the carbon emission
predicted_emission = model.predict(input_df)

# Output the predicted carbon emission
print(f"Predicted Carbon Emission: {predicted_emission[0]}")


Predicted Carbon Emission: 2361.67


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
import joblib
import os

# Load the dataset
file_path = r"C:\Users\ashwini\Downloads\flask-login-master\flask-login-master\Carbon Emission.csv"
data = pd.read_csv(file_path)
print("Dataset loaded successfully.")

# Handle missing values
data.fillna(data.mean(numeric_only=True), inplace=True)  # Fill numeric missing values with mean
for col in data.select_dtypes(include='object').columns:
    data[col].fillna(data[col].mode()[0], inplace=True)  # Fill categorical missing values with mode
print("Missing values handled.")

# Map non-numeric values to numeric equivalents
mappings = {
    'Social Activity': {'often': 3, 'sometimes': 2, 'rarely': 1, 'never': 0},
    'Frequency of Traveling by Air': {'very frequently': 4, 'frequently': 3, 'rarely': 2, 'never': 1},
    'Waste Bag Size': {'small': 1, 'medium': 2, 'large': 3, 'extra large': 4},
    'How Often Shower': {'daily': 1, 'weekly': 7, 'monthly': 30},
    'Energy efficiency': {'No': 0, 'Sometimes': 1, 'Yes': 2}
}
for column, mapping in mappings.items():
    if column in data.columns:
        data[column] = data[column].map(mapping)
print("Non-numeric columns mapped to numeric equivalents.")

# Drop unused columns
columns_to_drop = ['Recycling', 'Cooking_With']
data.drop(columns=columns_to_drop, axis=1, inplace=True, errors='ignore')
print(f"Columns {columns_to_drop} dropped.")

# Encode categorical columns
categorical_columns = ['Body Type', 'Sex', 'Diet', 'Transport', 'Vehicle Type', 'Heating Energy Source']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col].astype(str))  # Convert to string before encoding
    label_encoders[col] = le
print("Categorical columns encoded.")

# Ensure all columns are numeric
non_numeric_cols = data.dtypes[data.dtypes.apply(lambda x: x not in ['int64', 'float64'])].index
if non_numeric_cols.any():
    print("Non-numeric columns after preprocessing:", non_numeric_cols)
    raise ValueError("Some columns are still non-numeric. Please review the preprocessing steps.")

# Scale numerical columns
numerical_columns = ['Monthly Grocery Bill', 'Vehicle Monthly Distance Km', 'Waste Bag Weekly Count',
                     'How Long TV PC Daily Hour', 'How Many New Clothes Monthly', 'How Long Internet Daily Hour']
scaler = StandardScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])
print("Numerical columns scaled.")

# Split data into features (X) and target (y)
X = data.drop('CarbonEmission', axis=1)
y = data['CarbonEmission']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Dataset split into training and testing sets.")

# Train a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("Model training complete.")

# Evaluate the model
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE: {rmse}")

# Save the trained model and preprocessing objects
model_dir = "model"
os.makedirs(model_dir, exist_ok=True)
joblib.dump(model, os.path.join(model_dir, "carbon_footprint_model.pkl"))
joblib.dump(label_encoders, os.path.join(model_dir, "label_encoders.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "scaler.pkl"))
print("Model and preprocessors saved successfully.")


Dataset loaded successfully.
Missing values handled.
Non-numeric columns mapped to numeric equivalents.
Columns ['Recycling', 'Cooking_With'] dropped.
Categorical columns encoded.
Numerical columns scaled.
Dataset split into training and testing sets.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mode()[0], inplace=True)  # Fill categorical missing values with mode


Model training complete.
RMSE: 302.6941687057252
Model and preprocessors saved successfully.


