In [11]:


import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


df = pd.read_csv("/home/ruth/CLIMATE CHANGE/climatedata.csv")
df

Unnamed: 0,PARAMETER,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANN,County
0,T2M_MAX,2000,30.02,32.38,31.90,31.10,28.84,28.08,26.02,27.33,26.16,27.08,27.54,28.51,32.38,BARINGO
1,T2M_MAX,2001,29.05,30.90,30.94,30.33,27.89,25.70,25.09,26.19,26.71,28.21,25.25,27.65,30.94,BARINGO
2,T2M_MAX,2002,28.51,31.08,30.98,31.13,26.16,27.12,27.49,26.93,30.65,28.60,27.75,28.12,31.13,BARINGO
3,T2M_MAX,2003,29.45,31.25,32.10,31.62,24.94,24.73,23.78,24.19,26.25,27.38,26.87,27.34,32.10,BARINGO
4,T2M_MAX,2004,29.13,30.77,31.97,28.73,26.62,26.55,26.98,27.36,29.05,30.18,26.73,27.92,31.97,BARINGO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3193,PRECTOTCORR,2018,0.00,0.00,5.27,10.55,10.55,5.27,5.27,5.27,0.00,5.27,0.00,0.00,5.27,WEST POKOT
3194,PRECTOTCORR,2019,0.00,0.00,0.00,0.00,5.27,5.27,5.27,5.27,5.27,5.27,5.27,5.27,5.27,WEST POKOT
3195,PRECTOTCORR,2020,5.27,0.00,5.27,10.55,5.27,5.27,5.27,10.55,10.55,5.27,5.27,0.00,5.27,WEST POKOT
3196,PRECTOTCORR,2021,0.85,0.75,0.68,4.00,7.01,0.77,3.68,2.18,4.58,3.71,1.52,1.04,2.58,WEST POKOT


In [12]:
# filter only PRECIPITATION data drop PARAMETER, and ANN cols
data = df[df['PARAMETER'] == 'PRECTOTCORR'].drop(columns=['PARAMETER', 'ANN']).reset_index(drop=True)
data.shape

(1066, 14)

In [13]:
data.head(10)

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,County
0,2000,0.0,0.0,0.0,5.27,5.27,0.0,5.27,5.27,5.27,5.27,0.0,0.0,BARINGO
1,2001,0.0,0.0,0.0,5.27,5.27,5.27,5.27,5.27,5.27,5.27,5.27,0.0,BARINGO
2,2002,0.0,0.0,0.0,5.27,5.27,0.0,0.0,5.27,0.0,5.27,5.27,5.27,BARINGO
3,2003,0.0,0.0,0.0,10.55,5.27,5.27,5.27,5.27,0.0,5.27,0.0,0.0,BARINGO
4,2004,0.0,0.0,0.0,10.55,5.27,0.0,0.0,5.27,0.0,5.27,5.27,0.0,BARINGO
5,2005,0.0,0.0,0.0,5.27,5.27,0.0,0.0,5.27,5.27,5.27,0.0,0.0,BARINGO
6,2006,0.0,0.0,0.0,5.27,5.27,0.0,0.0,5.27,5.27,5.27,10.55,5.27,BARINGO
7,2007,0.0,5.27,0.0,5.27,5.27,0.0,5.27,5.27,5.27,5.27,0.0,0.0,BARINGO
8,2008,0.0,0.0,5.27,5.27,5.27,0.0,0.0,5.27,0.0,5.27,5.27,0.0,BARINGO
9,2009,0.0,0.0,0.0,5.27,5.27,0.0,5.27,0.0,0.0,5.27,0.0,5.27,BARINGO


In [14]:
# Add index col, will help with where to base the future predictions
data['index'] = [i for i in range(0, data.shape[0])]

In [15]:
# Training a linear regression model
X = data[['index', 'YEAR']]
y = data[['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']]

# Split the dataset into training and testing (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a model object
model = LinearRegression()

# Fit the model to the data (training)
model.fit(X_train, y_train)

In [16]:
# Function to predict rainfall for a specific county and month
def predict_rainfall_for_county_and_month(model, county, month):
    # Get the latest index in the dataset
    latest_index = data['index'].max()

    # Map month names to numerical values
    month_mapping = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
                     'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}

    # Calculate the future index and year based on user inputs
    future_index = latest_index + 1
    future_year = 2023  

    # Create the future data point for prediction
    future_data = np.array([[future_index, future_year]])

    # Make the prediction for the specified month
    predicted_rainfall = model.predict(future_data)[0, month_mapping[month] - 1]

    return predicted_rainfall
# county = 'Nairobi'
# month = 'March'  

# predicted_rainfall = predict_rainfall_for_county_and_month(model, county, month)
# print(f"Predicted rainfall for {county}, {month}: {predicted_rainfall} mm")


In [17]:
# Evalate model performance on test data
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae:.2f}')
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

Mean Absolute Error: 2.36
Mean Squared Error: 8.91
R-squared: 0.01


In [18]:
# Function to predict rainfall in the future
def predict_future_months(model, base_index, base_year, num_months):
    predictions = []

    for i in range(num_months):
        future_index = base_index + i
        future_year = base_year + (future_index // 12)
        future_month = future_index % 12

        future_data = np.array([[future_index, future_year]])
        future_prediction = model.predict(future_data).reshape(-1)
        predictions.append(future_prediction[future_month])

    return predictions

# Predicting rainfall in future
base_index = data.shape[0]  # Use the last index in the dataset
base_year = 2023  # Use the last year in the dataset
num_months = 3  # Predict 3 months into the future

predictions = predict_future_months(model, base_index, base_year, num_months)
print(f"The predictions for the next {num_months} months are:", predictions)


The predictions for the next 3 months are: [12.906828757554706, 10.391156211476584, 0.4199688694159489]


