### **Course Content**
##### Day 72 of 100 Days of Python
##### Project Name: Analyzing Programming Languages
##### Things I implemented: Pandas, Matplotlib

# Preliminary Data Exploration

In [1]:
import pandas as pd # import module

In [2]:
# Read the csv file and change the column names 
df = pd.read_csv(filepath_or_buffer='sample_data/programming_languages_survey.csv', header=0, names=['Date', 'Tag', 'Posts'])

In [None]:
df # make sure the columns are correct

In [None]:
df.head() # look at first 5 rows

In [None]:
df.tail() # look at last 5 rows

In [None]:
df.shape # count how many rows and columns / shape of the dataframe

In [None]:
df.count() # count number of entries in each column

# Analysis of Programming Languages

In [None]:
df.groupby('Tag').count() # number of months per Programming Language

In [None]:
df.groupby('Tag').sum() # number of posts per tag

# Data Cleaning: Working with timestamps

In [19]:
df['Date'] = pd.to_datetime(df['Date']) # convert all date from string to datetime

In [None]:
df.head()

# Data Manipulation: Pivoting Dataframes

In [None]:
# CHALLENGE: Create a new dataframe called as reshaped_df, with programming languages as its columns, 
# date as its index, and posts as its value

reshaped_df = df.pivot(index = 'Date', columns='Tag', values='Posts')
reshaped_df.head()

In [None]:
reshaped_df.tail()

In [57]:
reshaped_df.fillna(0, inplace=True)

In [59]:
reshaped_df.shape # shape of new dataframe

(145, 14)

In [None]:
reshaped_df.count() # number of entries per column

In [61]:
reshaped_df.columns

Index(['assembly', 'c', 'c#', 'c++', 'delphi', 'go', 'java', 'javascript',
       'perl', 'php', 'python', 'r', 'ruby', 'swift'],
      dtype='object', name='Tag')

# Data Visualization with Matplotlib

In [62]:
import matplotlib.pyplot as plt # I can also put it at the top to make it more readable, but i don't really care right now

In [None]:
# Plotting a single programming language
plt.figure(figsize=(12,16))
plt.xlabel('Date', fontsize=15)
plt.ylabel('Number of Posts', fontsize=15)
plt.plot(reshaped_df.index, reshaped_df['java'])

In [None]:
# Plotting Python and Java
plt.figure(figsize=(12,16))
plt.xlabel('Date', fontsize=15)
plt.ylabel('Number of Posts', fontsize=15)
plt.plot(reshaped_df.index, reshaped_df['java'])
plt.plot(reshaped_df.index, reshaped_df['python'])

In [None]:
# Plot everything

# Make the plot smoother

roll_df = reshaped_df.rolling(window=12).mean()
# Make it into a dataframe composed of averages observations

plt.figure(figsize=(12,16)) # manipulate the figure size

# Add the labels
plt.xlabel('Date', fontsize=15)
plt.ylabel('Number of Posts', fontsize=15)

# Add the plots
for column in roll_df.columns:
  plt.plot(roll_df.index, roll_df[column], linewidth=2, label=column)

# Create legend to explain which color is which
plt.legend(fontsize=15)