# <center> Data Visualization with Matplotlib

<center> 4.6.2020

<center> Mónika Farsang

### Importing libraries

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline

file = "Monika_Farsang_dataset.csv"
df = pd.read_csv(file)
df

### Histogramm

In [None]:
y1=plt.hist(df["Number of social interactions per day"], facecolor = "lightblue", edgecolor = "blue", bins = 10, cumulative=True)
plt.show()

The histogramm plot above shows the number of social interactions in a cumulative way.

In [None]:
y2=plt.hist(df["Number of close friends"], facecolor = "lightgreen", edgecolor = "green", bins = 10, range=(1, 10))
plt.show()

This histogramm plot give us information about the number of close friends in range from 1 to 10.

In [None]:
plt.hist(df["Number of social interactions per day"], facecolor = "lightblue", edgecolor = "blue", bins = 10, cumulative=True)
plt.hist(df["Number of close friends"], facecolor = "lightgreen", edgecolor = "green", bins = 10, range=(1, 10), alpha=0.5)
plt.show()

The multiple plot above displays the cumulative number of social interactions per day and the number of close friends in range from 1 to 10.

### Pie plot

In [None]:
from scipy import stats

# Frequency of individual sleeping hours values
freq_hours = stats.itemfreq(df["Sleeping hours"])

# Transform to DataFrame
df1 = pd.DataFrame(freq_hours, columns=['Sleeping hours', 'Frequency'])
F = df1['Frequency'].iloc[0:8]

# Calculating percentage
Frequency_hours = F *100/sum(F)

In [None]:
# Defining the labels
labels = '0h', '1h³', '2h', '3h', '4h', '5h', '6h', '7h'

# Defining the size of the slices
sizes = Frequency_hours

# Everything is plotted together
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=180)
ax1.axis('equal')  
plt.title("Sleeping hours over the submitted results", y=1.1)
plt.show()

This pie plot shows the percentage of the sleeping hours from the responses of the people interviewed in the range from 0 hour to 7 hours.

### Line plot

In [None]:
# Defining the size of the whole figure
fig = plt.figure(figsize = (10,6))

ax1 = fig.add_axes([0,0,2,2])
ax2 = fig.add_axes([0.1, 1.5, 1, 0.5])

ax1.set_title("Temperature differences and Arctic sea ice extent")

# Attributes for large graph
ax1.plot(df['Year for temperature difference'].iloc[99:141],
        df['Annual temp difference (°C)'].iloc[99:141],
        color = "red") 
ax1.set_xlabel("Year")
ax1.set_ylabel("relative °C")


# Attributes for insert graph
ax2.plot(df['Year for Arctic sea ice extent'].iloc[0:401],
        df['Arctic sea ice extent (million square km)'].iloc[0:401],
        color = "lightblue")
ax2.set_xlabel("Year")
ax2.set_ylabel("million square km")

plt.show()

This line plots give us information about the relationship between the temperature differences and the Arctic sea ice extent from 1979 to 2020. It displays clearly that the rising temperature causes the decreasing of the ice extent at the Arctic sea.

In [None]:
# Defining the size of the whole figure
fig = plt.figure(figsize =(20,8))
plt.suptitle("Environment", fontsize = 18)

# Creating plot ax1
ax1 = fig.add_subplot(221)
ax1.set_title("Annual temperature difference")
ax1.plot(df['Year for temperature difference'].iloc[99:141],
        df['Annual temp difference (°C)'].iloc[99:141],
        color = "red")
ax1.plot(df['Year for temperature difference'].iloc[99:141],
        df['Annual temp difference smooth (°C)'].iloc[99:141],
        color = "red", linestyle="dashed") 
ax1.set_xlabel("Year")
ax1.set_ylabel("relative °C")
# Insert horizontal line 
plt.axhline(y = 0, linewidth=1, color='darkred') 

# Creating plot ax2
ax2 = fig.add_subplot(222)
ax2.set_title("Arctic sea ice extent")
ax2.plot(df['Year for Arctic sea ice extent'].iloc[0:401],
        df['Arctic sea ice extent (million square km)'].iloc[0:401],
        color = "lightblue")
ax2.set_xlabel("Year")
ax2.set_ylabel("million square km")

# Creating plot ax3 
ax3 = fig.add_subplot(212)
ax3.set_title("CO2 emission")
date = pd.to_datetime(df["Year.Month for CO2"], format="%Y.%m.%d")
ax3.plot(date.iloc[-496:],
        df['CO2 (ppm)'].iloc[-496:],
        color = "brown")
ax3.set_xlabel("Year")
ax3.set_ylabel("CO2 (ppm)")

plt.show()

The lineplot above displays the environmental observations about the temperature differences, the Arctic sea ice extent and the CO2 emission from 1979 to 2020. As you can see, all three trends are interrelated.

### Boxplot and Violinplot

In [None]:
# Filtering data
fruit=df["Daily fruit/veggies"]  
data1 = fruit[~np.isnan(fruit)]

stress = df["Daily stress (0-5)"]
data2 = stress[~np.isnan(stress)]

friends=df["Number of close friends"]  
data3 = friends[~np.isnan(friends)]

social=df["Number of social interactions per day"]  
data4 = social[~np.isnan(social)]

sleep=df["Sleeping hours"]  
data5 = sleep[~np.isnan(sleep)]

# Assigning the boxplot data
plt.figure(figsize =(10,6))
box_plot_data=[data1, data2, data3, data4, data5]

# Defining the colors
colors = ['springgreen', 'turquoise', 'darkblue', 'cyan', 'lightblue']
labels = ["Fruit","Stress","Friends","Social interaction","Sleeping"]

bp = plt.boxplot(box_plot_data, labels= labels,
                patch_artist = True, notch = True)

for i in range(len(bp['boxes'])):
    bp['boxes'][i].set(facecolor = colors[i])
    bp['caps'][2*i + 1].set(color = colors[i])

plt.show()

The boxplot above gives information about the amount of daily fruit veggies, the daily stress, the number of close friends, the number of social interactions per day and the sleeping hours. Each box extends from the lower to upper quartile values of the given data with a line at the median. The whiskers extend from the box to show the range of our data.

In [None]:
# Plotting violinplot
plt.figure(figsize = (10,6))
vp = plt.violinplot([data1, data2, data3, data4, data5], showmedians = True)
plt.xticks([1,2,3,4,5], labels)
for i in range(len(vp['bodies'])):
    vp['bodies'][i].set(facecolor=colors[i])
    
plt.show()

The violinplot above is similar to the boxplot above it, but it has a different form with the whole range of the data.

### Stack plot

In [None]:
# Applying the rolling mean in CO2 (ppm) with 5 window step
rmean1=df['CO2 (ppm)'][0:175].rolling(window=5, min_periods=1).mean()
rmean2=df['CO2 (ppm)'][175:350].rolling(window=5, min_periods=1).mean()
rmean3=df['CO2 (ppm)'][350:525].rolling(window=5, min_periods=1).mean()
rmean4=df['CO2 (ppm)'][525:700].rolling(window=5, min_periods=1).mean()

plt.figure(figsize=(12, 6))
x = df['Year.Month for CO2'][0:175]
y = np.vstack([rmean1,rmean2,rmean3,rmean4])

# Defining the labels
labels = ['CO2 emission 1958-1972',
         'CO2 emission 1972-1987',
         'CO2 emission 1987-2001',
         'CO2 emission 2001-2016']

# Defining the colors
colors = ['paleturquoise',
          'turquoise',
          'darkturquoise',
          'lightblue']

plt.stackplot(x, y, labels = labels, colors = colors, edgecolor = 'white')
plt.tick_params(labelbottom=False)
plt.title('CO2 emission over 14 years periods')
plt.xlabel('14 years period')
plt.ylabel('CO2 (ppm)')
plt.legend(loc=8)
plt.tight_layout()
plt.show()

This stack plot displays the CO2 emission over 14 years periods. It can be seen that the level of emissions has shown an increasing trend over the years.

### Stem plot

In [None]:
# Comparing parts of data about the sea ice extent
ice_diff_9989_8979 = np.array(df['Arctic sea ice extent (million square km)'][10:20]) - np.array(df['Arctic sea ice extent (million square km)'][0:10])
ice_diff_0999_9989 = np.array(df['Arctic sea ice extent (million square km)'][20:30]) - np.array(df['Arctic sea ice extent (million square km)'][10:20])
ice_diff_1909_0999 = np.array(df['Arctic sea ice extent (million square km)'][30:40]) - np.array(df['Arctic sea ice extent (million square km)'][20:30])
years = [i for i in range(10)]

In [None]:
plt.figure(figsize=(10,6))
plt.suptitle('Arctic sea ice extent between 10 years periods', y=1.1, fontsize=15)

# Creating the first subplot
plt.subplot(311)
plt.stem(years,
         ice_diff_9989_8979,
         markerfmt = 'g_',
         linefmt = 'b--', 
         basefmt='r-') 
plt.title('Between the period 1989-1999 and 1989-1979')
plt.xlabel('Years')
plt.ylabel('Ice extent difference')

# Creating the second subplot
plt.subplot(312)
plt.stem(years,
         ice_diff_0999_9989,
         markerfmt = 'g_',
         linefmt = 'b--',
         basefmt='r-')
plt.title('Between the period 1999-2009 and 1989-1999')
plt.xlabel('Years')
plt.ylabel('Ice extent difference')

# Creating the third subplot
plt.subplot(313)
plt.stem(years,
         ice_diff_1909_0999,
         markerfmt = 'g_',
         linefmt = 'b--',
         basefmt='r-')
plt.title('Between the period 2009-2019 and 1999-2009')
plt.xlabel('Years')
plt.ylabel('Ice extent difference')

plt.tight_layout()
plt.show()

The stem plot above shows the difference between the 10 years periods regarding the Arctic sea ice extent. It can be seen from it that for the most part the rate has decreased.