Climate Change Modelling

1. Libraries and Reading the file

In [1]:
# import the necessary libraries for reading the data from the dataset

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt
import xgboost as xgb
import seaborn as sns

In [2]:
# Import the dataset

data = pd.read_csv(r"D:\climate_change_data.csv\climate_change_data.csv")

2. Dataset Info.

In [None]:
data.head()

In [None]:
data.describe().T

In [None]:
data.info()

Data Preprocessing

1. Data Cleaning

In [None]:
# Check if dataset has null values or not

data.isnull().sum()

# Note : Fill the null value in the columns using fillna function

2. Data Normalization

In [7]:
data['normal'] = (data['Temperature'].values - (-3.803589)) / (33.976956 - (-3.803589))

In [8]:
data['Date'] = pd.to_datetime(data['Date']).dt.month

In [None]:
data.columns

In [10]:
df = data[['Country', 'Location']]

# Function to map strings to numbers for a given column
def map_column_to_numbers(column):
    value_counts = column.value_counts()
    value_map = {}
    
    counter = 1
    for value in value_counts.index:
        if value_counts[value] >= 2:
            value_map[value] = counter
        else:
            value_map[value] = counter
        counter += 1
    
    return column.map(value_map)

# Apply the mapping function to both columns
data['Country Number'] = map_column_to_numbers(df['Country'])
data['Location Number'] = map_column_to_numbers(df['Location'])

In [12]:
# Data Featuring and Labeling 

x = data.drop(['normal', 'Location', 'Country'],  axis = 1)
y = data[['normal', 'Date', 'Temperature', 'CO2 Emissions', 'Sea Level Rise', 'Precipitation', 'Humidity', 'Wind Speed', 'normal', 'Country Number', 'Location Number']]

Machine Learning

1. Training and Testing

In [13]:
# Split the data into training and testing sets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

2. Standardize the Model

In [14]:
# Standardize the features

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

3. Train the Model

In [17]:
# Train diffrent types the model

def Machine_Learning(x_train, y_train, choice) :
    match choice :
        case 1 :
            print("Random Forest Model : ")
            model = RandomForestRegressor()
            return model.fit(x_train, y_train)
        
        case 2 :
            print("Linear Regression Model : ")
            model = LinearRegression()
            return model.fit(x_train, y_train)

        case 3 :
            print("Decision Tree Model : ")
            model = DecisionTreeRegressor()
            return model(x_train, y_train)
        
        case 4 :
            print("Gradient Boosting Machines Model : ")
            model = xgb.XGBClassifier(objective='multi:softmax', num_class=3, colsample_bytree=0.3, learning_rate=0.1, max_depth=5, n_estimators=100)
            return model(x_train, y_train)
        
        case _ :
            print("No more Machine Learning Model are available")

4. Make Prediction 

In [None]:
# Make predictions

print("Enter 1 for Random Forest Model\nEnter 2 for Linear Regression Model\nEnter 3 for Decision Tree Model\nEnter 4 for Gradient Boosting Machine Model\n" )

choice = int(input("Enter : "))

obj = Machine_Learning(x_train, y_train, choice)
y_pred = obj.predict(x_test)

5. Evaluating the Model 

In [19]:
# Evaluate the model

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
# Print

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R2 Score: {r2}')

Data Visualization

In [None]:
plt.figure(figsize=(10, 6))

# Scatter plot with regression line

sns.scatterplot(x='CO2 Emissions', y='Temperature', data=data, color='blue', label='Actual Data')

plt.title('Global Temperature vs. CO2 Levels')
plt.xlabel('CO2 Levels (ppm)')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.grid(True)
plt.show()

Predict the Value

In [None]:
# Linear Regression

print("Enter the values for future prediction : ")
for i in range(0, 10) :
    x_future = np.array[[i]]
    
# Predict future values
y_future = obj.predict(x_future)

print("Future predictions : ", y_future)

In [None]:
# Define scenarios

current_co2 = int(input("Enter the value of CO2 : "))
scenarios = {
    'Current CO2': current_co2,
    '5% Increase': current_co2 * 1.05,
    '10% Increase': current_co2 * 1.10,
    '15% Increase': current_co2 * 1.15,
    '20% Increase': current_co2 * 1.20,
    '25% Increase': current_co2 * 1.25,
    '30% Increase': current_co2 * 1.30,
    '35% Increase': current_co2 * 1.35,
    '40% Increase': current_co2 * 1.40
}

# Make predictions for each scenario
predictions = {}
for scenario, co2_level in scenarios.items():
    prediction = obj.predict([[co2_level]])[0]
    predictions[scenario] = prediction

print(predictions)