In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from __future__ import print_function #adds compatibility with new versions of Python
%matplotlib inline 



In [None]:
dataset = pd.read_excel('C:/Users/miant/Desktop/Python/Canada.xlsx', 
                       sheet_name = "Canada by Citizenship",
                       skipfooter = 2,
                       skiprows = range(20))
dataset.head(5)

In [None]:
dataset.drop(['AREA','REG','DEV'], axis=1, inplace=True)
dataset.head()

In [None]:
dataset['Total'] = dataset.sum(axis=1)
dataset.head()

In [None]:
dataset.set_index('OdName', inplace=True)

In [None]:
#Checking a particular row
print(dataset.loc['Haiti'])

In [None]:
#Checking a particular year and a particular row

print(dataset.loc['Haiti', 2013])

In [None]:
#Converting the years to string
dataset.columns = list(map(str, dataset.columns))
[print (type(x)) for x in dataset.columns.values] 

In [None]:
#LINE PLOT - IMMIGRATION FROM HAITI

#Creating the range of years
years = list (map(str, range(1980, 2014))) 

dataset.loc['Haiti', years].plot(kind = 'line', color = 'red')
plt.title('Immigrants from Haiti')
plt.ylabel('Number of immigrants')
plt.xlabel('Years')


In [None]:
#ADD TEXT TO EXPLAIN THE SPIKE

years = list (map(str, range(1980, 2014))) 

dataset.loc['Haiti', years].plot(kind = 'line', color = 'red')
plt.title('Immigrants from Haiti')
plt.ylabel('Number of immigrants')
plt.xlabel('Years')

plt.text(20, 6000, '2010 Earthquake')

In [None]:
dataset.loc['Haiti', years].plot(kind = 'line', color = 'red')
dataset.loc['Argentina', years].plot(kind = 'line', color = 'blue')
plt.title('Immigrants from Haiti vs Argentina')
plt.ylabel('Number of immigrants')
plt.xlabel('Years')
plt.legend(['Haiti', 'Argentina'])

In [None]:
#PIE CHART

data_continents = dataset.groupby('AreaName', axis = 0).sum()


data_continents['Total'].plot(kind='pie')


plt.title('Immigration by Areas [1980-2013]')
plt.tight_layout()

In [None]:
#PIE CHART

data_continents = dataset.groupby('AreaName', axis = 0).sum()

data_continents['Total'].plot(kind='pie', radius=1.2, autopct='%1.1f%%')

plt.title('Immigration by Areas [1980-2013]', fontsize = 12)
plt.tight_layout()

In [None]:
#PIE CHART
#Explode a slice
data_continents = dataset.groupby('AreaName', axis = 0).sum()
data_continents


In [None]:
#PIE CHART
#Explode a slice

data_continents = dataset.groupby('AreaName', axis = 0).sum()
explode = (0.3, 0, 0, 0, 0, 0)
data_continents['Total'].plot(kind='pie', radius=1.2, autopct='%1.1f%%', explode = explode, shadow = True)

plt.title('Immigration by Areas [1980-2013]')
plt.tight_layout()

In [None]:
#PIE CHART
#Explode a slice

data_continents = dataset.groupby('AreaName', axis = 0).sum()
explode = (0.2, 0, 0.2, 0, 0, 0)
data_continents['Total'].plot(kind='pie', radius=1.2, autopct='%1.1f%%', explode = explode, shadow = True)

plt.title('Immigration by Areas [1980-2013]')
plt.tight_layout()

In [None]:
#AREA PLOTS
#We sort the values from the largest to the smallest

dataset.sort_values(['Total'], ascending = False, axis = 0, inplace = True)
dataset.head()

In [None]:
#We select the top 5 countries to create the area plot
top5 = dataset[years].head(5).transpose()
top5.head(10)

In [None]:
top5.plot(kind='area', 
             alpha=0.25, # 0-1, default value a= 0.5
             figsize=(20, 10),)

plt.title('Immigration Trend of Top 5 Countries', size=18)
plt.ylabel('Number of Immigrants', size=15)
plt.xlabel('Years',size=15)


In [None]:
#HORIZONTAL BAR CHART TO COMPARE IMMIGRATION - TOP 5

# Extract data from India and China (Top2)
data_CI = dataset.loc[['India','China'], years].transpose()
data_CI.reset_index(inplace=True)
data_CI.rename(columns={'index':'Year'},inplace=True)
data_CI.head()

In [None]:
ax0= data_CI.plot(kind='barh',  x='Year',figsize=(15, 10))

ax0.set_xlabel('Year', fontsize=12)
ax0.set_ylabel('Number of Immigrants India vs China',fontsize=12)
ax0.set_title('Immigration from top 2 countries between 1980 and 2013', fontsize=18)

In [None]:
#India bar chart

india = dataset.loc['India',years]

india.plot(kind='bar',figsize=(10,6), color='pink')
plt.xlabel('Years', fontsize = 12)
plt.ylabel('Number of Immigrants', fontsize = 12)
plt.title('Immigration from India', fontsize = 15)

In [None]:
#HISTOGRAM

dataset['2013'].plot(kind = 'hist', color = 'green')


plt.title('Immigration in 2013')
plt.ylabel('Number of Countries')
plt.xlabel('Number of Immigrants')



In [None]:
#COMPARING TWO DIFFERENT PLOTS (SUBPLOTS)

plt.subplot(1, 2, 1)
dataset['2008'].plot(kind = 'hist', color = 'green')
plt.title('Immigration in 2008')
plt.ylabel('Number of Countries')
plt.xlabel('Number of Immigrants')

plt.subplot(1, 2, 2)
dataset['2010'].plot(kind = 'hist', color = 'green')
plt.title('Immigration in 2010')
plt.ylabel('Number of Countries')
plt.xlabel('Number of Immigrants')

plt.tight_layout(pad = 3.0)

In [None]:
#BOX PLOT

japan = dataset.loc[['Japan'], years].transpose()
japan.plot(kind = 'box')

plt.title('Immigration from Japan 1980-2013')
plt.ylabel('Number of immigrants')

In [None]:
#MULTIPLE BOX PLOTS

fig = plt.figure(1, figsize=(12,8))

JB = dataset.loc[['Japan', 'Brazil'], years].transpose()

box = plt.boxplot(JB, patch_artist=True)

# fill with colors
colors = ['green','blue']

for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)     
          
plt.title('Immigration Japan vs Brazil', size=18)
plt.ylabel('Number of Immigrants', size=12)
plt.xlabel('Countries', size=12)
plt.xticks([1, 2], ['Japan','Brazil'])
plt.show()

In [None]:
#SCATTER PLOT

#we need to create a table

data_total = pd.DataFrame(dataset[years].sum())

#reset index
data_total.reset_index(inplace=True)

#rename columns
data_total.columns = ['Year', 'Total']

data_total.head()

In [None]:
data_total.plot(kind='scatter', x='Year', y='Total', figsize=(10, 6), color = 'red')
plt.xlabel('Year', fontsize = 12)
plt.ylabel('Number of Immigrants', fontsize = 12)
plt.title('Total Immigration between 1980-2013', fontsize = 15)
plt.xticks(rotation=90)
plt.yticks(rotation=0)


In [None]:
#BUBBLE PLOT (COMPARING TWO COUNTRIES)
top2 = dataset.loc[:,years].head(2)
top2.head(5)

In [None]:
# Extract data from India and China (Top2)
data_CI = dataset.loc[['India','China'], years].transpose()
data_CI.reset_index(inplace=True)
data_CI.rename(columns={'index':'Year'},inplace=True)
data_CI.head()

In [None]:
#Plotting

ax0= data_CI.plot(kind='scatter',
             x='Year',
             y='India',
            figsize=(14, 8),
           s=data_CI['India']/50, # size of the bubble
           alpha=0.5, # transparency
           c='violet')

ax1= data_CI.plot(kind='scatter',
             x='Year',
             y='China',
            figsize=(14, 8),
           s=data_CI['China']/50,
           alpha=0.5,
           c='green',
         ax = ax0)

plt.xticks(rotation=90)

ax0.set_xlabel('Year', fontsize=12)
ax0.set_ylabel('Number of Immigrants', fontsize=12)
ax0.set_title('Immigration India vs China from 1980 - 2013', fontsize=18)
ax0.legend(['India', 'China'], loc='upper left', fontsize=12)

In [None]:
#PRACTICE TIME! CREATE A BARCHAR COMPARING INDIA AND CHINA