# Tempora - Climate Analysis 

Himesh Dulal (20), Avipsa Parajuli (37), Aayusha Shrestha (50), Pratistha Thapa (57), Abhinav Lamsal (68)

## <b>Summary </b>
The project uses numerical methods like Interpolation, Linear Regression, Polynomial Fit + Extrapolation and Newton Raphson Method for analysis and predicting the climate data of data set provided as per csv.

## 1. Loading the CSV Data

In [19]:
import pandas as pd

def load_and_prepare_data(filepath):
    df = pd.read_csv(filepath)
    df = df.sort_values(['Year', 'Month']).reset_index(drop=True)
    return df

In [20]:
def linear_interpolate_series(series):
    result = series.copy()
    for i in range(len(series)):
        if pd.isna(series[i]):
            # Find previous known value
            prev_index = i - 1
            while prev_index >= 0 and pd.isna(series[prev_index]):
                prev_index -= 1

            # Find next known value
            next_index = i + 1
            while next_index < len(series) and pd.isna(series[next_index]):
                next_index += 1

            if prev_index >= 0 and next_index < len(series):
                # Perform linear interpolation
                x0, y0 = prev_index, series[prev_index]
                x1, y1 = next_index, series[next_index]
                x = i
                result[i] = y0 + (x - x0) * (y1 - y0) / (x1 - x0)

    return result

In [21]:
def clean_data(df):
    print("Data with Missing Values:")
    print(df[['Year', 'Month', 'Temp_2m', 'Precip']][df['Temp_2m'].isna() | df['Precip'].isna()])

    cleaned_df = df.copy()

    cleaned_df['Temp_2m'] = linear_interpolate_series(cleaned_df['Temp_2m'])
    cleaned_df['Precip'] = linear_interpolate_series(cleaned_df['Precip'])

    print("\n After Cleaning:")
    print(cleaned_df[['Year', 'Month', 'Temp_2m', 'Precip']])

    # Plot before vs after
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(df['Temp_2m'], label='Original Temp')
    plt.plot(cleaned_df['Temp_2m'], label='Cleaned Temp', linestyle='--')
    plt.title('Temperature - Before vs After Interpolation')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(df['Precip'], label='Original Rain')
    plt.plot(cleaned_df['Precip'], label='Cleaned Rain', linestyle='--')
    plt.title('Rainfall - Before vs After Interpolation')
    plt.legend()

    plt.tight_layout()
    plt.show()

    return cleaned_df

In [22]:
cleaned_df = clean_data(df)

Data with Missing Values:
     Year  Month    Temp_2m  Precip
2    2000      3  14.473226     NaN
6    2000      7        NaN  139.33
9    2000     10        NaN     NaN
15   2001      4        NaN    6.09
24   2002      1        NaN   12.10
30   2002      7        NaN     NaN
55   2004      8        NaN     NaN
58   2004     11  11.911667     NaN
73   2006      2  13.719286     NaN
77   2006      6  21.040000     NaN
80   2006      9  19.235000     NaN
88   2007      5  21.738710     NaN
91   2007      8  20.660323     NaN
93   2007     10  16.804839     NaN
112  2009      6        NaN  107.11
113  2009      7        NaN  219.02
115  2009      9        NaN   54.11
120  2010      2  11.110714     NaN
125  2010      7  21.698387     NaN
126  2010      8        NaN  341.30
129  2010     11  13.663333     NaN
137  2011      7        NaN  287.51
156  2013      3  15.580000     NaN
170  2014      6        NaN  235.00
172  2014      8  20.816774     NaN
182  2015      6        NaN  170.67
19

NameError: name 'plt' is not defined

In [23]:
def visualize_trends(df):
    # Group by year
    yearly_df = df.groupby('Year').agg({
        'Temp_2m': 'mean',
        'Precip': 'sum'
    }).reset_index()

    plt.figure(figsize=(10, 5))

    plt.plot(yearly_df['Year'], yearly_df['Temp_2m'], color='orange', marker='o', label='Avg Temp (°C)')
    plt.plot(yearly_df['Year'], yearly_df['Precip'], color='blue', marker='s', label='Total Rainfall (mm)')

    plt.title('Climate Trends in Kathmandu (2000–2019)')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()


In [24]:
visualize_trends(cleaned_df)

NameError: name 'cleaned_df' is not defined

In [25]:
def compare_trends_by_year(df_original, df_cleaned):
    # Group both datasets by year
    yearly_orig = df_original.groupby('Year').agg({
        'Temp_2m': 'mean',
        'Precip': 'sum'
    }).reset_index()

    yearly_cleaned = df_cleaned.groupby('Year').agg({
        'Temp_2m': 'mean',
        'Precip': 'sum'
    }).reset_index()

    # Plot comparison
    plt.figure(figsize=(12, 6))

    # Temperature
    plt.subplot(1, 2, 1)
    plt.plot(yearly_orig['Year'], yearly_orig['Temp_2m'], label='Original Avg Temp', color='orange', marker='x')
    plt.plot(yearly_cleaned['Year'], yearly_cleaned['Temp_2m'], label='Interpolated Avg Temp', color='green', linestyle='--', marker='o')
    plt.title('Yearly Avg Temperature (2000–2019)')
    plt.xlabel('Year')
    plt.ylabel('Temperature (°C)')
    plt.legend()
    plt.grid(True)

    # Rainfall
    plt.subplot(1, 2, 2)
    plt.plot(yearly_orig['Year'], yearly_orig['Precip'], label='Original Rainfall', color='blue', marker='x')
    plt.plot(yearly_cleaned['Year'], yearly_cleaned['Precip'], label='Interpolated Rainfall', color='purple', linestyle='--', marker='o')
    plt.title('Yearly Rainfall Totals (2000–2019)')
    plt.xlabel('Year')
    plt.ylabel('Rainfall (mm)')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()


In [26]:
compare_trends_by_year(df, cleaned_df)

NameError: name 'cleaned_df' is not defined

In [27]:
district="Kathmandu"
filepath=f"./data/final_data/{district}.csv"

df = load_and_prepare_data(filepath)