In [3]:
# --- 1️⃣ Annual CO₂ Emission file ---
uploaded = files.upload()          # choose annual carbon emission data.csv
df_co2 = pd.read_csv("annual carbon emission data.csv")
print("✅ CO2 file loaded:", df_co2.shape)

Saving annual carbon emission data.csv to annual carbon emission data (2).csv
✅ CO2 file loaded: (1386, 4)


In [4]:
# --- 2️⃣ Population file ---
uploaded = files.upload()          # choose population.csv
df_pop = pd.read_csv("population.csv")
print("✅ Population file loaded:", df_pop.shape)

Saving population.csv to population.csv
✅ Population file loaded: (18944, 3)


In [5]:
# --- 3️⃣ Energy use per person file ---
uploaded = files.upload()          # choose energy-use-per-person.csv
df_energy = pd.read_csv("energy-use-per-person.csv")
print("✅ Energy file loaded:", df_energy.shape)

Saving energy-use-per-person.csv to energy-use-per-person.csv
✅ Energy file loaded: (11086, 3)


In [6]:
# --- 4️⃣ Fertilizer vs GDP file ---
uploaded = files.upload()          # choose fertilizer-consumption-per-hectare-vs-gdp-per-capita.csv
df_fert = pd.read_csv("fertilizer-consumption-per-hectare-vs-gdp-per-capita.csv")
print("✅ Fertilizer file loaded:", df_fert.shape)

Saving fertilizer-consumption-per-hectare-vs-gdp-per-capita.csv to fertilizer-consumption-per-hectare-vs-gdp-per-capita.csv
✅ Fertilizer file loaded: (13920, 6)


In [7]:
# Rename common columns for consistency
df_co2 = df_co2.rename(columns={'Entity': 'Country', 'Year': 'Year', df_co2.columns[-1]: 'CO2_Emissions'})
df_pop = df_pop.rename(columns={'Entity': 'Country', 'Year': 'Year', df_pop.columns[-1]: 'Population'})
df_energy = df_energy.rename(columns={'Entity': 'Country', 'Year': 'Year', df_energy.columns[-1]: 'Energy_Use_per_person'})
df_fert = df_fert.rename(columns={'Entity': 'Country', 'Year': 'Year', df_fert.columns[-1]: 'Fertilizer_vs_GDP'})

print("✅ Columns standardized successfully!")


✅ Columns standardized successfully!


In [8]:
# Merge step-by-step on Country and Year
merged = df_co2.merge(df_pop, on=['Country','Year'], how='left')
merged = merged.merge(df_energy, on=['Country','Year'], how='left')
merged = merged.merge(df_fert, on=['Country','Year'], how='left')

print("✅ Merged dataset shape:", merged.shape)
merged.head()


✅ Merged dataset shape: (1386, 10)


Unnamed: 0,Country,Code_x,Year,CO2_Emissions,Population,Energy_Use_per_person,Code_y,Nutrient nitrogen N (total) | 00003102 || Use per area of cropland | 005159 || kilograms per hectare,"GDP per capita, PPP (constant 2021 international $)",Fertilizer_vs_GDP
0,Brazil,BRA,1856,227193.0,,,,,,
1,Brazil,BRA,1857,269048.0,,,,,,
2,Brazil,BRA,1858,284547.0,,,,,,
3,Brazil,BRA,1859,372714.0,,,,,,
4,Brazil,BRA,1860,434898.0,,,,,,


In [9]:
# Convert numeric columns and fill missing values
for col in ['Population', 'Energy_Use_per_person', 'Fertilizer_vs_GDP', 'CO2_Emissions']:
    merged[col] = pd.to_numeric(merged[col], errors='coerce')

merged = merged.dropna(subset=['CO2_Emissions'])
merged.fillna(merged.median(numeric_only=True), inplace=True)

print("✅ Data cleaned successfully!")
print("Shape after cleaning:", merged.shape)
merged.head()


✅ Data cleaned successfully!
Shape after cleaning: (1386, 10)


Unnamed: 0,Country,Code_x,Year,CO2_Emissions,Population,Energy_Use_per_person,Code_y,Nutrient nitrogen N (total) | 00003102 || Use per area of cropland | 005159 || kilograms per hectare,"GDP per capita, PPP (constant 2021 international $)",Fertilizer_vs_GDP
0,Brazil,BRA,1856,227193.0,139720658.5,39086.637,,85.595,41736.702,
1,Brazil,BRA,1857,269048.0,139720658.5,39086.637,,85.595,41736.702,
2,Brazil,BRA,1858,284547.0,139720658.5,39086.637,,85.595,41736.702,
3,Brazil,BRA,1859,372714.0,139720658.5,39086.637,,85.595,41736.702,
4,Brazil,BRA,1860,434898.0,139720658.5,39086.637,,85.595,41736.702,


In [10]:
X = merged[['Year', 'Population', 'Energy_Use_per_person', 'Fertilizer_vs_GDP']].values
y = merged['CO2_Emissions'].values


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train
xgb = XGBRegressor(
    n_estimators=800,
    learning_rate=0.03,
    max_depth=7,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    objective='reg:squarederror'
)
xgb.fit(X_train_scaled, y_train)

# Evaluate
y_pred = xgb.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"✅ Model trained successfully!")
print(f"R² Score: {r2:.4f}  → Accuracy = {r2*100:.2f}%")
print(f"MSE: {mse:.4f}")


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


✅ Model trained successfully!
R² Score: 0.9451  → Accuracy = 94.51%
MSE: 129627179587313168.0000


In [12]:
import joblib

joblib.dump(xgb, "carbon_emission_model.pkl")
joblib.dump(scaler, "scaler.pkl")
merged.to_csv("final_carbon_emission_dataset.csv", index=False)

from google.colab import files
files.download("carbon_emission_model.pkl")
files.download("final_carbon_emission_dataset.csv")

print("✅ Files saved and ready for GitHub!")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Files saved and ready for GitHub!


In [13]:
!pip install gradio -q
import gradio as gr
import numpy as np
import joblib

model = joblib.load("carbon_emission_model.pkl")
scaler = joblib.load("scaler.pkl")

def predict_emission(year, population, energy, fertilizer):
    X_new = np.array([[year, population, energy, fertilizer]])
    X_scaled = scaler.transform(X_new)
    pred = model.predict(X_scaled)[0]
    return f"Predicted CO₂ emissions for {int(year)}: {pred:,.2f} tonnes"

iface = gr.Interface(
    fn=predict_emission,
    inputs=[
        gr.Number(label="Year"),
        gr.Number(label="Population"),
        gr.Number(label="Energy use per person"),
        gr.Number(label="Fertilizer vs GDP"),
    ],
    outputs="text",
    title="CO₂ Emission Prediction Chatbot",
    description="Enter year, population, energy use, and fertilizer vs GDP to predict carbon emissions."
)
iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7cffa771865137ed9c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


