In [None]:
# Necessary Imports
%matplotlib inline

import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [None]:
dataset = pd.read_csv('Data/temp_data-AllYears-Min-Max.csv')
dataset.head()

In [None]:
# Let X be world population and y be global average yearly temperature, then Reshape X and y
X = dataset['Min Avg Temp'].values.reshape(-1,1)
y = dataset['Max Avg Temp'].values.reshape(-1,1)

In [None]:
# split the train - test data to be 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# Train the algorithm using a linear regression
regressor = LinearRegression()  
regressor.fit(X_train, y_train)

In [None]:
#To retrieve the intercept:
print("The y intercept is:  ", regressor.intercept_)
#For retrieving the slope:
print("The slope is:  ", regressor.coef_)

In [None]:
# Make predictions
y_pred = regressor.predict(X_test)

In [None]:
# Compare actual to predicted values
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten(), "Error": y_pred.flatten() - y_test.flatten()})
# pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})[["Predicted", "Actual", "Error"]]
df

In [None]:
# Create a bar chart to show comparison between actual and predicted
df1 = df.head(23)
df1.plot(kind='bar',figsize=(16,10))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()

In [None]:
# Plot our straight line with the test data
plt.scatter(X_test, y_test,  color='gray')
plt.plot(X_test, y_pred, color='red', linewidth=2)
plt.xlabel("Minimum Temperature (C)")
plt.ylabel("Maximum Temperature (C)")
plt.title("Scatter Plot of Test Data Minimum to Maximum Temperature \n")
fig1 = plt.gcf()
fig1.savefig('images/MinMax_Temp_scatter_test.png', bbox_inches='tight')

In [None]:
# Calculate margin or error
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

In [None]:
x_min = X.min()
x_max = X.max()
print("Min/Max X values (Year)", x_min, x_max)

In [None]:
# Get y (temperature min and max actuals)
y_min_actual = y.min()
y_max_actual = y.max()
print("Min/Max Y values (Temperature)", y_min_actual, y_max_actual)

In [None]:
# The y intercept is:   [8.25909715]
# The slope is:   [[0.64427629]]
y_min = 8.25909715 + 0.64427629 * x_min
y_max = 8.25909715 + 0.64427629 * x_max
print(f"Actual Smallest Max Value: {y_min_actual}")
print(f"Calculated Smallest Max Value: {y_min}")
print(f"Actual Largest Max Value: {y_max_actual}")
print(f"Calculated Largest Max Value: {y_max}")

In [None]:
# Predict calculated values
y_min_predicted = regressor.predict([[x_min]])
y_max_predicted = regressor.predict([[x_max]])
print(f"Actual Smallest Max Value: {y_min_actual}")
print(f"Predicted Smallest Max Value: {y_min_predicted}")
print(f"Actual Largest Max Value: {y_max_actual}")
print(f"Predicted Largest Max Value: {y_max_predicted}")

In [None]:
# Plot
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')
plt.xticks(rotation=45)
plt.xlabel("Minimum Average Annual Temperature")
plt.ylabel("Maximum Average Annual Temperature")
plt.title('Minimum vs. Maximum Average Values (in Celsius)')
fig2 = plt.gcf()
fig2.savefig('images/Min_vs_Max_Temp.png', bbox_inches='tight')

## Plugging in the Numbers:
### Average Global Temperature  =  slope  *  CO2  +  y-intercept
### Slope = 0.00327157
### Y-Yntercept = 12.82624641
### Global Temp. Above 1.5 Degrees C From 1850 Average Global Temperature = 16.37
### Population = (16.37 - 12.82624641)/0.00327157
### CO2 = 1083.19662731
### Mean Squared Error: 0.010940654807335687