In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
# Load datasets
temps = pd.read_csv("DataSet/Annual_Surface_Temperature_Change.csv")
emissions = pd.read_csv("DataSet/Emissions_Country_india.csv")

In [3]:
# Filter data for India
indian_temps = temps.query("Country == 'India'")
indian_emissions = emissions.query("Area == 'India'")

In [4]:
# Reshape temperature data
temperature_data = indian_temps.drop(columns=['ObjectId', 'ISO2', 'ISO3', 'Indicator', 'Code', 'Unit', 'Source']).set_index('Country').stack().reset_index()
temperature_data.columns = ['Area Code', 'Year', 'Temperature change (Celsius)']

In [5]:
# Print the column names to check if 'Year' is present
print(temperature_data.columns)

Index(['Area Code', 'Year', 'Temperature change (Celsius)'], dtype='object')


In [6]:
# Drop duplicate rows based on 'Year'
temperature_data = temperature_data.drop_duplicates(subset=['Year'])

In [7]:
# Remove 'F' prefix from the 'Year' column
temperature_data['Year'] = temperature_data['Year'].str.replace('F', '').astype(int)

In [8]:


if 'Year' in temperature_data.columns:
    # Set index on emission data
    emissions_data = indian_emissions.set_index('Year')

    # Merge datasets
    merged_data = pd.merge(temperature_data, emissions_data, how='inner', on='Year')

    # Filter for Methane emissions from Rice Production
    methane_emissions = merged_data[merged_data['Element'] == 'Emissions (CH4)']
    methane_emissions = methane_emissions.rename(columns={'Value': 'CH4_Emissions'})

    # Correlation analysis
    correlation = methane_emissions['CH4_Emissions'].corr(methane_emissions['Temperature change (Celsius)'])
    print("Correlation between Methane emissions from Rice Production and Temperature change:", correlation)

    # Prepare data for regression
    X = methane_emissions[['CH4_Emissions']]
    y = methane_emissions['Temperature change (Celsius)']

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

    # Train regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict temperature change for the next 10 years
    future_years = np.arange(2022, 2032).reshape(-1, 1)
    predicted_temps = model.predict(future_years)

    # Evaluate model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

    print(predicted_temps)
else:
    print("'Year' column not found in the DataFrame.")


Correlation between Methane emissions from Rice Production and Temperature change: -0.07417186975542545
Mean Squared Error: 0.17059303849539276
[0.2888863  0.28886239 0.28883848 0.28881456 0.28879065 0.28876674
 0.28874282 0.28871891 0.288695   0.28867108]


