Importing Modules

In [1]:
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import datetime

import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import colorlover as cl
from plotly.subplots import make_subplots
from scipy.stats import ttest_ind, pearsonr, linregress
from scipy.optimize import curve_fit
import kaleido
import os as os

from sklearn.metrics import r2_score 

from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

import math as math

In [2]:
def LinRegress(X, Y):
    data = pd.DataFrame({'x':X, 'y': Y})
    model = ols("y ~ x", data).fit()
    return model

def exponenial_func(x, a, b, c):
    return a*np.exp(-b*x)+c

def linear_func(x, b,m):
    return (m*x)+b

In [3]:
####     COLOR VALUES     ####

red = 'rgb(128, 0, 0)'
light_red = 'rgb(255, 153, 153)'

light_blue = 'rgb(135,206,235)'

Reading data

In [4]:
df = pd.read_csv('CSV/Mean House Prices/MSPUS.csv')

copy = df.copy() # backup of dataset

df.isna().sum() 

DATE     0
MSPUS    0
dtype: int64

In [5]:
df.head()

Unnamed: 0,DATE,MSPUS
0,1963-01-01,17800.0
1,1963-04-01,18000.0
2,1963-07-01,17900.0
3,1963-10-01,18500.0
4,1964-01-01,18500.0


section 1 :D

In [11]:
df.dropna(axis = 0, inplace = True) # delete all rows that have N/A values

df1 = df.copy()

df1['year'] = pd.to_datetime(df1['DATE']).dt.year
df1.drop(columns = ['DATE'], axis = 1, inplace = True)

# year_avg = df1.groupby('year')[['MSPUS']].mean().reset_index()
# 
df1.tail()

Unnamed: 0,MSPUS,year
241,418500.0,2023
242,435400.0,2023
243,423200.0,2023
244,426800.0,2024
245,412300.0,2024


In [15]:
reg_df = df.copy()


reg_df['item_id'] = reg_df.index

reg_df

Unnamed: 0,DATE,MSPUS,item_id
0,1963-01-01,17800.0,0
1,1963-04-01,18000.0,1
2,1963-07-01,17900.0,2
3,1963-10-01,18500.0,3
4,1964-01-01,18500.0,4
...,...,...,...
241,2023-04-01,418500.0,241
242,2023-07-01,435400.0,242
243,2023-10-01,423200.0,243
244,2024-01-01,426800.0,244


In [49]:
fig = go.Figure()
fig.update_layout(title="Temporal House Price Over Quarters: 1963 - 2024", title_font_size = 20,
                  font=dict( family="Courier New, monospace", size=12,color="#7f7f7f"),
                  template = "ggplot2", hovermode= 'closest')
fig.update_xaxes(showline=True, linewidth=1, linecolor='gray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='gray')

fig.add_trace(go.Scatter(x = df['DATE'], y = df['MSPUS'], mode = 'lines',
                         name = 'House Price', marker_color=red))

####     REGRESSION     ####

X = pd.DataFrame(reg_df[reg_df['item_id'].between(0,148)][['item_id']])
y = pd.DataFrame(reg_df[reg_df['item_id'].between(0,148)][['MSPUS']])
model = LinRegress(X['item_id'], y['MSPUS'])
yy= linear_func(X, *model._results.params)


fig.add_trace(go.Scatter(x = pd.DataFrame(reg_df[reg_df['item_id'].between(0,148)][['DATE']])['DATE'], y = yy['item_id'], mode = 'lines',
                          name = 'Pre-2000 Regression', 
                          line=dict(dash='dash'),marker_color=light_red))

####     REGRESSION     ####

X = pd.DataFrame(reg_df[reg_df['item_id'].between(148,245)][['item_id']])
y = pd.DataFrame(reg_df[reg_df['item_id'].between(148,245)][['MSPUS']])
model = LinRegress(X['item_id'], y['MSPUS'])
yy= linear_func(X, *model._results.params)

fig.add_trace(go.Scatter(x = pd.DataFrame(reg_df[reg_df['item_id'].between(148,245)][['DATE']])['DATE'], y = yy['item_id'], mode = 'lines',
                         name = 'Post-2000 Regression', 
                         line=dict(dash='dash'), marker_color=light_blue))


####     SPLIT LINE     ####

fig.add_trace(go.Scatter( x=['2000-01-01', '2000-01-01'], y=[0, 450000], mode="lines",line=go.scatter.Line(color="gray"), showlegend=False))

In [9]:
# regressions

X = pd.DataFrame(avg_abs[avg_abs['Year'].between(1975,2023)][['Year']])
y = pd.DataFrame(avg_abs[avg_abs['Year'].between(1975,2023)][['Temp']])

model = LinRegress(X['Year'], y['Temp'])

b,m = model._results.params
r = model.rsquared
t = model._results.tvalues[1]

print(f"y = {m}*x+{b}")
print(f"R² = {r}")
print(f"t = {t}")


NameError: name 'avg_abs' is not defined

In [None]:
fig = make_subplots(rows=1, cols=1, insets=[{'cell': (1,1), 'l': 0.7, 'b': 0.3}], x_title = 'Temperature (°C)', subplot_titles=('Land Avg. Temperature vs. Time',  'Land Min. Temperature vs. Time', 'Land Max. Temperature vs. Time', 'Land & Ocean Avg. Temperature vs. Time'))
fig.update_layout(title="Average Temperatures Pre and Post 1975",font=dict( family="Courier New, monospace", size=15,color="#000000"),
                  template = "ggplot2", title_font_size = 20, hovermode= 'closest')

fig.add_trace(go.Box(x = avg_abs['Temp'], y = avg_abs['turnpoint'], boxpoints = 'all',jitter = 0.3,
                     pointpos = -1.6, marker_color = red, boxmean = True, showlegend=False),
              row = 1, col = 1)

fig.update_traces(orientation='h')
#pio.write_image(fig, "images/fig2.png", width=5*200, height=2*200, scale=.75)
fig

t-test

In [None]:
dfc = pd.read_csv('CSV/CO2 Concentration/CO2 Concentration.csv')
dfc.dropna(axis = 0, inplace = True)
dfc.drop(columns = 'Decimal Date', axis = 1, inplace = True)

dfc.info()

Carbon data setup

In [None]:
carbon_year = dfc.groupby(by = 'Year')[['Carbon Dioxide (ppm)']].mean().reset_index()
carbon_month = dfc.groupby(by = ['Year', 'Month'])[['Carbon Dioxide (ppm)']]


In [None]:
popt, pcov = curve_fit(exponenial_func, carbon_year['Year'], carbon_year['Carbon Dioxide (ppm)'], p0=(1, 1e-6, 1), maxfev=10000)

In [None]:
yy = exponenial_func(carbon_year['Year'], *popt)
a, b, c = popt
b*=-1
print(f"y = {a}*e^{b}*x + {c}")
rSquared = r2_score(carbon_year['Carbon Dioxide (ppm)'], yy)
print(f"R² = {rSquared}")

In [None]:
X = carbon_year['Year']
y = carbon_year['Carbon Dioxide (ppm)']

data = pd.DataFrame({'x':X, 'y':y})
model = ols("y ~ x", data).fit()

print(model.summary())

yyy= linear_func(X, *model._results.params)

In [None]:
fig = go.Figure()
fig.update_layout(title="Carbon Dioxide PPM per Year (1958 - 2017)", title_font_size = 20,
                  font=dict( family="Courier New, monospace", size=15,color="#000000"),
                  template = "ggplot2", hovermode= 'closest', legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.update_xaxes(showline=True, linewidth=1, linecolor='gray', title = 'Time (Year)')
fig.update_yaxes(showline=True, linewidth=1, linecolor='gray', title = 'Carbon Dioxide (PPM)')

fig.add_trace(go.Scatter(x = carbon_year['Year'], y = yy, mode = 'lines',
                         name = 'Exponential Regression Line', marker_color=light_blue))
fig.add_trace(go.Scatter(x = carbon_year['Year'], y = carbon_year['Carbon Dioxide (ppm)'], mode = 'markers',
                         showlegend=False, marker_color=red))

#pio.write_image(fig, "images/fig3.png", width=5*200, height=2*200, scale=.75)
fig

In [None]:
# regression

X = carbon_year[carbon_year['Year'].between(1958,2017)][['Carbon Dioxide (ppm)']].reset_index()
Y = avg_abs[avg_abs['Year'].between(1958,2017)][['Temp']].reset_index()

data = pd.DataFrame({'x':X['Carbon Dioxide (ppm)'], 'y':Y['Temp']})
model = ols("y ~ x", data).fit()

print(model.summary())

In [None]:
yy = linear_func(carbon_year['Carbon Dioxide (ppm)'], *model._results.params)

In [None]:
fig = go.Figure()
fig.update_layout(title="Global Land Avg. Temp vs. Carbon Dioxide <br> PPM per Year (1958 - 2017)", title_font_size = 30,
                  font=dict( family="Courier New, monospace", size=25,color="#000000"),
                  template = "ggplot2", hovermode= 'closest', legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.update_xaxes(showline=True, linewidth=1, linecolor='gray', title = 'Carbon Dioxide (PPM)')
fig.update_yaxes(showline=True, linewidth=1, linecolor='gray', title = 'Land Average Temperature (°C)')

fig.add_trace(go.Scatter(x = X['Carbon Dioxide (ppm)'], y=yy, mode = 'lines',
                         name = 'Linear Regression Line', marker_color=light_blue))
fig.add_trace(go.Scatter(x = X['Carbon Dioxide (ppm)'], y=Y['Temp'], mode = 'markers',
                         showlegend=False, marker_color=red))

#pio.write_image(fig, "images/fig4.png", width=5*200, height=2*200, scale=.75)
#pio.write_image(fig, "images/fig4poster.png", width=8.2*100, height=6.5*100, scale=.5)

fig

In [None]:
ttest_ind(carbon_year['Carbon Dioxide (ppm)'], avg_abs['Temp'], equal_var=False)

In [None]:
carboncorr = carbon_year[carbon_year['Year'].between(1958,2015)]
carboncorr = carboncorr.rename(columns={'Year':'Year', 'Carbon Dioxide (ppm)':'data'})
tempcorr = avg_abs[avg_abs['Year'].between(1958,2015)][['Year', 'Temp']]
tempcorr = tempcorr.rename(columns={'Temp':'data'})

pearsonr(tempcorr['data'], carboncorr['data'])

In [None]:
data = { 
    "Years": [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050]
}

Model

Co2 v t: y = 6.14506622218523e-13\*e^0.016418987055116834*x + 257.6808192876246

Temp v Co2: y = 0.0112*x + 10.4692


In [None]:
def model(x):
    return 0.0112*(6.14506622218523*pow(10,-13)*pow(math.e, (0.016418987055116834*x)) + 257.6808192876246)+10.4692

datadf = pd.DataFrame(data)
aa = model(datadf["Years"])

In [None]:
Y = avg_abs[avg_abs['Year'].between(2000,2016)][['Temp']].reset_index()
X = avg_abs[avg_abs['Year'].between(2000,2017)][['Year']].reset_index()

In [None]:
aa.head(10)

In [None]:
Y.loc[len(df.index)] = [2017, 15.016024]

In [None]:
fig = go.Figure()
fig.update_layout(title="Future Global Land Avg. Temperature Trends: 2000-2050", title_font_size = 20,
                  font=dict( family="Courier New, monospace", size=15,color="#000000"),
                  template = "ggplot2", hovermode= 'closest', legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.update_xaxes(showline=True, linewidth=1, linecolor='gray', title = 'Year')
fig.update_yaxes(showline=True, linewidth=1, linecolor='gray', title = 'Temperature')

fig.add_trace(go.Scatter(x = X['Year'], y = Y['Temp'], mode = 'lines',
                         name = 'Historic Temperature Data', marker_color=red))
fig.add_trace(go.Scatter(x = datadf["Years"], y = aa, name = 'Model Predicted Data', line=dict(
                          color=red, dash='dash')))

#pio.write_image(fig, "images/fig10.png", width=5*200, height=2*200, scale=.75)
# fig.update_layout(title="Future Global Land Avg. Temperature <br>Trends: 2000-2035", title_font_size = 30,
#                   font=dict( family="Courier New, monospace", size=25,color="#000000"),
#                   template = "ggplot2", hovermode= 'closest', legend=dict(
#     yanchor="top",
#     y=0.99,
#     xanchor="left",
#     x=0.01
# ))
#pio.write_image(fig, "images/fig10poster.png", width=8.2*100, height=6.5*100, scale=.5)
fig