In [52]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [53]:
sea_level = pd.read_csv('datasets/sealevel.csv')

In [54]:
import plotly.express as px


yearly_avg_gmsl = sea_level.groupby('Year')['GMSL_GIA'].mean().reset_index()

fig = px.line(yearly_avg_gmsl, x='Year', y='GMSL_GIA', 
              title='Yearly Average Global Mean Sea Level (GMSL) Over the Years (Including GIA)',
              labels={'Year': 'Year', 'GMSL_GIA': 'Average Global Mean Sea Level (mm)'})

fig.show()


In [66]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Sample data for 'sea_level' assuming 'Year' and 'GMSL_GIA' columns.
# Replace this with actual data in real usage.
data = pd.read_csv('datasets/sealevel.csv')
sea_level = pd.DataFrame(data)

# Calculate yearly averages
yearly_avg_gmsl = sea_level.groupby('Year')['GMSL_GIA'].mean().reset_index()

# Prepare data for polynomial regression model
X = yearly_avg_gmsl['Year'].values.reshape(-1, 1)
y = yearly_avg_gmsl['GMSL_GIA'].values

# Create a PolynomialFeatures object to transform the data for polynomial regression
poly = PolynomialFeatures(degree=10)
X_poly = poly.fit_transform(X)

# Fit a linear regression model
model = LinearRegression()
model.fit(X_poly, y)

# Predict GMSL for observed years and extend predictions to 2030
years_extended = np.arange(X.max() + 1, 2031).reshape(-1, 1)
years_extended_poly = poly.transform(years_extended)  # Transform the extended years to polynomial features
predicted_gmsl = model.predict(years_extended_poly)

# Add predictions to DataFrame
predicted_df = pd.DataFrame({'Year': years_extended.flatten(), 'Predicted_GMSL': predicted_gmsl})

# Plotting observed data and polynomial regression predictions as scatter plots
fig = px.scatter(yearly_avg_gmsl, x='Year', y='GMSL_GIA',
                 title='Yearly Average Global Mean Sea Level (GMSL) with Polynomial Regression Prediction to 2030',
                 labels={'Year': 'Year', 'GMSL_GIA': 'Average Global Mean Sea Level (mm)'},
                 opacity=0.7, color_discrete_sequence=['blue'])
fig.add_scatter(x=predicted_df['Year'], y=predicted_df['Predicted_GMSL'],
                mode='markers', marker=dict(color='red'))

# Show plot
fig.show()

In [None]:
global_temp = pd.read_csv('datasets/GlobalLandTemperaturesByCountry.csv')
global_temp = global_temp.dropna(subset=['AverageTemperature'])
global_temp = global_temp[global_temp['dt'] >= '1900-01-01']
global_temp['Year'] = pd.to_datetime(global_temp['dt']).dt.year
global_temp = global_temp.groupby(['Year'])['AverageTemperature'].mean().reset_index()
global_temp
fig = px.line(global_temp, x='Year', y='AverageTemperature', 
              title='Yearly Average Global Land Temperature',
              labels={'Year': 'Year', 'AverageTemperature': 'Average Temperature (°C)'})
fig.show()


In [85]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Load the dataset and clean it
global_temp = pd.read_csv('datasets/GlobalLandTemperaturesByCountry.csv')
global_temp = global_temp.dropna(subset=['AverageTemperature'])
global_temp = global_temp[global_temp['dt'] >= '1900-01-01']
global_temp['Year'] = pd.to_datetime(global_temp['dt']).dt.year
global_temp = global_temp.groupby(['Year'])['AverageTemperature'].mean().reset_index()

# Prepare data for prediction
X = global_temp['Year'].values.reshape(-1, 1)
y = global_temp['AverageTemperature'].values

# Create years for prediction (from Year.max() + 1 to 2030)
years_extended = np.arange(X.max() + 1, 2031).reshape(-1, 1)

# Model 1: Linear Regression
linear_model = LinearRegression()
linear_model.fit(X, y)
predicted_linear = linear_model.predict(years_extended)

# Model 2: Polynomial Regression (degree = 3)
poly = PolynomialFeatures(degree=1)
X_poly = poly.fit_transform(X)
poly_model = LinearRegression()
poly_model.fit(X_poly, y)
years_extended_poly = poly.transform(years_extended)
predicted_poly = poly_model.predict(years_extended_poly)

# Model 3: Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)
predicted_rf = rf_model.predict(years_extended)

# Model 4: XGBoost Regressor
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_model.fit(X, y)
predicted_xgb = xgb_model.predict(years_extended)

# Combine all predictions into a DataFrame
predicted_df = pd.DataFrame({
    'Year': years_extended.flatten(),
    'Linear_Predicted_Temperature': predicted_linear,
    'Poly_Predicted_Temperature': predicted_poly,
    'RF_Predicted_Temperature': predicted_rf,
    'XGB_Predicted_Temperature': predicted_xgb
})

# Plotting the original data and predictions using scatter plot
fig = px.scatter(global_temp, x='Year', y='AverageTemperature',
                 title='Yearly Average Global Land Temperature with Predictions (2024 - 2030)',
                 labels={'Year': 'Year', 'AverageTemperature': 'Average Temperature (°C)'})

# Add predicted values from different models as scatter plots
fig.add_scatter(x=predicted_df['Year'], y=predicted_df['Poly_Predicted_Temperature'],
                mode='markers', name='Polynomial Regression Prediction', marker=dict(color='red'))

# Show the plot
fig.show()