In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from prophet import Prophet

### Loading datasets

In [9]:
def load_data(file_path1, file_path2):
    df1 = pd.read_csv(file_path1)  # All countries global temperature
    df2 = pd.read_csv(file_path2)  # Average Surface Temperature
    return df1, df2

file_path1 = r"C:\Users\chris.mutuku\OneDrive - Skanem AS\Desktop\CAPSTONE PROJECT\Capstone-Project---Group-8\Data\Climate Change - datasets\all countries global temperature.csv"
file_path2 = r"C:\Users\chris.mutuku\OneDrive - Skanem AS\Desktop\CAPSTONE PROJECT\Capstone-Project---Group-8\Data\Climate Change - datasets\Average Surface Temperature.csv"
df1, df2 = load_data(file_path1, file_path2)

In [10]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

def predict_temperatures(country, target_year):
    """
    Predict temperature for a given country and year (2022-2030)
    Returns actual history and predicted values
    """
    # Load and prepare data
    df = pd.read_csv("average_surface_temperature_predictions_2022_2030.csv")
    
    # Filter for selected country
    country_data = df[df['Country Name'] == country]
    
    if country_data.empty:
        return None, None, f"Data not available for {country}"
    
    # Get historical and predicted values
    historical = df2[df2['Country Name'] == country].groupby('year')['Average surface temperature'].mean()
    predicted = country_data.iloc[:, :-1].T
    predicted.columns = ['Predicted']
    
    # Create plot
    plt.figure(figsize=(10, 6))
    plt.plot(historical.index, historical.values, label='Historical', marker='o')
    plt.plot(predicted.index.astype(int), predicted['Predicted'], label='Predicted', marker='x')
    plt.title(f"Temperature Trends for {country}")
    plt.xlabel("Year")
    plt.ylabel("Temperature (°C)")
    plt.legend()
    plt.grid()
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    # Get specific prediction if target year provided
    if target_year:
        try:
            year_pred = predicted.loc[str(target_year)][0]
            return historical, predicted, f"Predicted {target_year} temperature: {year_pred:.2f}°C"
        except KeyError:
            return historical, predicted, "Prediction only available for 2022-2030"
    
    return historical, predicted, None

In [11]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
from prophet import Prophet

# Load the dataset
file_path2 = r"C:\Users\chris.mutuku\OneDrive - Skanem AS\Desktop\CAPSTONE PROJECT\Capstone-Project---Group-8\Data\Climate Change - datasets\Average Surface Temperature.csv"
df2 = pd.read_csv(file_path2)

# Convert Day column to datetime and extract month/year
df2['Day'] = pd.to_datetime(df2['Day'])
df2['month'] = df2['Day'].dt.month
df2['year'] = df2['Day'].dt.year

# Calculate yearly averages
yearly_avg = df2.groupby(['Entity', 'year'])['Average surface temperature'].mean().reset_index()

# List of entities (countries)
entities = sorted(df2['Entity'].unique().tolist())

# Create widgets
entity_select = widgets.SelectMultiple(
    options=entities,
    description='Select Countries:',
    value=['Kenya'],  # Default selection
    rows=10,
    disabled=False
)

analysis_type = widgets.RadioButtons(
    options=['Yearly Trend', 'Monthly Trend', 'Forecast to 2060'],
    description='Analysis Type:',
    disabled=False
)

year_range_slider = widgets.IntRangeSlider(
    value=[1980, 2020],
    min=df2['year'].min(),
    max=df2['year'].max(),
    step=1,
    description='Year Range:',
    continuous_update=False
)

forecast_start_slider = widgets.IntSlider(
    value=2020,
    min=1980,
    max=2050,
    step=1,
    description='Forecast Start:',
    disabled=False
)

plot_button = widgets.Button(description="Generate Plot", button_style='success')
output = widgets.Output()

# Function to plot yearly trend for multiple countries
def plot_yearly_trend(entities, year_range):
    plt.figure(figsize=(14, 7))
    
    for entity in entities:
        entity_data = yearly_avg[
            (yearly_avg['Entity'] == entity) & 
            (yearly_avg['year'] >= year_range[0]) & 
            (yearly_avg['year'] <= year_range[1])
        ]
        
        if not entity_data.empty:
            plt.plot(entity_data['year'], entity_data['Average surface temperature'], 
                    marker='o', label=entity, linewidth=2)
    
    if entities:
        plt.title(f"Yearly Average Surface Temperature ({year_range[0]}-{year_range[1]})")
        plt.xlabel("Year")
        plt.ylabel("Temperature (°C)")
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()
    else:
        print("Please select at least one country.")

# Function to plot monthly trend for multiple countries
def plot_monthly_trend(entities, year_range):
    plt.figure(figsize=(14, 7))
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
             'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    for entity in entities:
        entity_data = df2[
            (df2['Entity'] == entity) & 
            (df2['year'] >= year_range[0]) & 
            (df2['year'] <= year_range[1])
        ]
        
        if not entity_data.empty:
            monthly_avg = entity_data.groupby('month')['Average surface temperature'].mean()
            plt.plot(months, monthly_avg, marker='o', label=entity, linewidth=2)
    
    if entities:
        plt.title(f"Monthly Average Temperature ({year_range[0]}-{year_range[1]})")
        plt.xlabel("Month")
        plt.ylabel("Temperature (°C)")
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()
    else:
        print("Please select at least one country.")

# Function to generate forecast for multiple countries
def generate_forecast(entities, forecast_start):
    plt.figure(figsize=(14, 7))
    forecast_results = {}
    
    for entity in entities:
        entity_data = yearly_avg[yearly_avg['Entity'] == entity]
        
        if not entity_data.empty:
            # Prepare data for Prophet
            prophet_data = entity_data[['year', 'Average surface temperature']].rename(
                columns={'year': 'ds', 'Average surface temperature': 'y'})
            prophet_data['ds'] = pd.to_datetime(prophet_data['ds'], format='%Y')
            
            # Filter data up to forecast start year
            train_data = prophet_data[prophet_data['ds'].dt.year <= forecast_start]
            
            if len(train_data) > 2:  # Need at least 3 points for forecasting
                model = Prophet(
                    yearly_seasonality=True,
                    changepoint_prior_scale=0.5,
                    seasonality_prior_scale=10.0
                )
                model.fit(train_data)
                
                # Make future dataframe extending to 2060
                future_years = 2060 - forecast_start
                future = model.make_future_dataframe(periods=future_years, freq='Y')
                forecast = model.predict(future)
                
                # Plot forecast
                plt.plot(forecast['ds'], forecast['yhat'], label=f"{entity} (forecast)", linestyle='--')
                plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], alpha=0.2)
                
                # Plot historical data
                plt.plot(prophet_data['ds'], prophet_data['y'], label=f"{entity} (historical)", linewidth=2)
                
                # Store forecast results
                forecast_results[entity] = {
                    '2023': entity_data[entity_data['year'] == 2023]['Average surface temperature'].values[0] if 2023 in entity_data['year'].values else None,
                    '2060': forecast[forecast['ds'].dt.year == 2060]['yhat'].values[0]
                }
    
    if entities and forecast_results:
        plt.title(f"Temperature Forecast to 2060 (Starting from {forecast_start})")
        plt.xlabel("Year")
        plt.ylabel("Temperature (°C)")
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()
        
        # Display forecast summary table
        print("\nForecast Summary (Temperature in °C):")
        summary_df = pd.DataFrame.from_dict(forecast_results, orient='index')
        summary_df['Change (2060-2023)'] = summary_df['2060'] - summary_df['2023']
        display(summary_df.style.format("{:.2f}").background_gradient(cmap='coolwarm'))
    else:
        print("No valid data available for forecasting the selected countries.")

# Button click handler
def on_plot_button_click(b):
    with output:
        output.clear_output()
        selected_entities = list(entity_select.value)
        year_range = year_range_slider.value
        forecast_start = forecast_start_slider.value
        
        if analysis_type.value == 'Yearly Trend':
            plot_yearly_trend(selected_entities, year_range)
        elif analysis_type.value == 'Monthly Trend':
            plot_monthly_trend(selected_entities, year_range)
        elif analysis_type.value == 'Forecast to 2060':
            generate_forecast(selected_entities, forecast_start)

plot_button.on_click(on_plot_button_click)

# Create a tabbed interface
tab = widgets.Tab()
tab.children = [
    widgets.VBox([
        widgets.HBox([entity_select, analysis_type]),
        year_range_slider,
        forecast_start_slider,
        plot_button
    ]),
    output
]
tab.set_title(0, 'Controls')
tab.set_title(1, 'Results')

# Display the interface
display(tab)

Tab(children=(VBox(children=(HBox(children=(SelectMultiple(description='Select Countries:', index=(96,), optio…