## Solis Projectum => Data Analysis

In [None]:
from scipy import stats
from scipy.stats import linregress
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import solis

In [None]:
# Data file locations
clean_dir = "data/clean"
plots_dir = "plots"

### Load the Clean data
***

In [None]:
#### Sunspots
csv_file_sunspots = {"monthly":"sunspot_monthly.csv","yearly":"sunspot_yearly.csv"}

sunspot_monthly = solis.load_csv(csv_file_sunspots["monthly"])
sunspot_monthly = sunspot_monthly.set_index("Year")

sunspot_yearly = solis.load_csv(csv_file_sunspots["yearly"])
sunspot_yearly = sunspot_yearly.set_index("Year")

In [None]:
#### Temperature
csv_file_temp = {"avg":"temp_average.csv","max":"temp_maximum.csv","min":"temp_minimum.csv"}

temp_average = solis.load_csv(csv_file_temp["avg"])
temp_average = temp_average.set_index("Year")

temp_maximum = solis.load_csv(csv_file_temp["max"])
temp_maximum = temp_maximum.set_index("Year")

temp_minimum = solis.load_csv(csv_file_temp["min"])
temp_minimum = temp_minimum.set_index("Year")

In [None]:
#### Cooling & Heating Days
csv_file_heatcool = {"summer":"summer_cooling_days.csv","winter":"winter_heating_days.csv"}

summer_cooling_days = solis.load_csv(csv_file_heatcool["summer"])
winter_heating_days = solis.load_csv(csv_file_heatcool["winter"])

### Plots
***

# Space Weather

In [None]:
# Generate the Plot
plt.figure(figsize = (20,10))
plt.title("Sunspots Yearly Total Average",fontdict = {'fontsize': 24, 'fontweight': 'bold'})
plt.ylabel("Volume", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
plt.xlabel("Year", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
plt.xticks(np.arange(1895,2020,step=11))
plt.grid(True)

x_axis = sunspot_yearly.index
sunspot_yearly_mean_total = sunspot_yearly["Yearly Mean Total Sunspots"]

plt.plot(x_axis, sunspot_yearly_mean_total, color='red')
plt.show()

# Terra Firma Weather

In [None]:
# Generate the Plot
plt.figure(figsize = (20,6))
plt.title("Average Yearly Temperature",fontdict = {'fontsize': 24, 'fontweight': 'bold'})
plt.ylabel("Degree", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
plt.xlabel("Year", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
plt.xticks(np.arange(1895,2020,step=11))
plt.grid(True)

x_axis = temp_average.index

plt.plot(x_axis, temp_average.mean(axis=1), color='navy')
plt.show()

In [None]:
# Generate the Plot
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()

ax1.set_title("Yearly Sunspots and Temperature Average",fontdict = {'fontsize': 24, 'fontweight': 'bold'})
ax1.set_xlabel("Year", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
ax1.set_ylabel("Degree", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
ax2.set_ylabel("Sunspot Activity", fontdict = {'fontsize': 14, 'fontweight': 'bold'}, color = "red")

x_axis = sunspot_yearly.index

ax1.plot(x_axis, temp_average.mean(axis=1), color='navy')
ax2.scatter(x_axis, sunspot_yearly_mean_total, color='red', edgecolors='black', s=(sunspot_yearly_mean_total/2+15), alpha=.55)

ax1.set_xticks(np.arange(1895,2020,step=11))
plt.grid(True)
plt.show()

In [None]:
# Generate the Plot
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()

ax1.set_title("Yearly Sunspots and Temperature Maximum Average",fontdict = {'fontsize': 24, 'fontweight': 'bold'})
ax1.set_xlabel("Year", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
ax1.set_ylabel("Degree", fontdict = {'fontsize': 14, 'fontweight': 'bold'})
ax2.set_ylabel("Sunspot Activity", fontdict = {'fontsize': 14, 'fontweight': 'bold'}, color = "red")

x_axis = temp_average.index
temp_maximum_mean = temp_maximum.mean(axis=1)

ax1.plot(x_axis, temp_maximum.mean(axis=1), color='navy')
ax2.scatter(x_axis, sunspot_yearly_mean_total, color='red', edgecolors='black', s=(sunspot_yearly_mean_total/2+15), alpha=.55)

ax1.set_xticks(np.arange(1895,2020,step=11))
plt.grid(True)
plt.show()

In [None]:
temp_maximum.mean(axis=1).head(15)

In [None]:
temp_average.mean().head(35)

In [None]:
temp_average

In [None]:
sunspot_monthly = sunspot_monthly.reset_index()
#sunspot_monthly["Month"] = sunspot_monthly["Month"].apply(solis.vert_month_num)
sunspot_monthly.head()


In [None]:
sunspot_monthly_reg = sunspot_monthly[["Year","Month","Monthly Mean Total"]]
sunspot_monthly_reg.head()

In [None]:
sunspot_monthly_reg.head()
sunspot_monthly_reg.dtypes
sunspot_monthly_reg.columns

In [None]:
sm = pd.DataFrame(sunspot_monthly_reg.groupby(['Year', 'Month']).mean())
sm.head(15)

In [None]:
sm = sm.unstack(level = 1)
sm.head(15)

In [None]:
sm.columns

In [None]:
sm.rename(level=1, columns=lambda x: solis.vert_month_num(x), inplace=True)

In [None]:
sm.columns

In [None]:
sm.head()

In [None]:
sm_df = sm[['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']]

In [None]:
sm_df.head(35)

In [None]:
temp_average.mean(axis=1).head(75)

In [None]:
temp_average.head(10)

In [None]:
#

x = temp_average.mean(axis=1).head(75)
y = sm.mean(axis=1).head(75)

gradient,intercept,r_value,p_value,std_err=stats.linregress(x,y)
r_sqrd = r_value**2

print("Gradient and intercept: {},{}".format(gradient,intercept))
print("R-squared {}".format(r_sqrd))
print("p-value {}".format(p_value))
print("std err {}".format(std_err))


In [None]:
(slope, intercept, _, _, _) = stats.linregress(x, y)
fit = slope * x + intercept

# Plot data
fig, ax = plt.subplots()

#fig.suptitle("Fake Banana Data!", fontsize=16, fontweight="bold")
#ax.set_xlim(0, 2000)
#ax.set_ylim(0, 2000)

#ax.set_xlabel("Fake Banana Ages (in days)")
#ax.set_ylabel("Fake Banana Weights (in Hundres of Kilograms)")

ax.plot(x, y, linewidth=0, marker='o')
ax.plot(x, fit, 'b--')

plt.show()