In [1]:
# import relevant libraries to work in
# matplotlib and seaborn for plots & visualizations
# datetime for data type conversion
# sklearn for linear regression and other types of models

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from datetime import timedelta
from sklearn.linear_model import LinearRegression
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go


In [2]:
# read in the data

data = pd.read_csv('https://raw.githubusercontent.com/Tinashe-04/InternationalStudentsCensusData/main/fixedCensusData.csv')
data

Unnamed: 0.1,Unnamed: 0,Place of Origin,1949/50,1954/55,1959/60,1964/65,1969/70,1974/75,1979/80,1984/85,...,2014/15,2015/16,2016/17,2017/18,2018/19,2019/20,2020/21,2021/22,2022/23,% Change
0,0,Burundi,0,0,0,6,6,5,12,32,...,165,148,180,134,141,164,149,157,194,23.6
1,1,Djibouti,0,0,0,0,0,1,2,16,...,7,9,10,9,9,6,6,6,10,66.7
2,2,Eritrea,0,0,0,0,0,0,0,0,...,108,109,138,151,117,81,53,77,73,-5.2
3,3,Ethiopia,37,62,170,266,540,2050,1340,2120,...,1472,1517,1847,2118,2061,2356,2166,2680,3006,12.2
4,4,Kenya,0,19,156,774,492,870,1850,1890,...,3072,3019,3189,3322,3451,3710,3502,3799,4059,6.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236,236,Tuvalu,0,0,0,0,0,0,0,0,...,7,4,3,4,4,4,6,1,3,200.0
237,237,Vanuatu,0,0,0,0,0,0,0,2,...,8,9,11,14,15,13,11,11,8,-27.3
238,238,Wallis and Futuna,0,0,0,0,0,0,0,0,...,18,4,2,0,2,0,3,9,0,-100.0
239,239,"Pacific Islands, Unspecified",0,15,38,145,327,1263,2033,1686,...,0,0,0,0,0,0,0,0,0,0.0


In [3]:
# check the columns
data.columns

Index(['Unnamed: 0', 'Place of Origin', '1949/50', '1954/55', '1959/60',
       '1964/65', '1969/70', '1974/75', '1979/80', '1984/85', '1989/90',
       '1994/95', '1999/00', '2000/01', '2001/02', '2002/03', '2003/04',
       '2004/05', '2005/06', '2006/07', '2007/08', '2008/09', '2009/10',
       '2010/11', '2011/12', '2012/13', '2013/14', '2014/15', '2015/16',
       '2016/17', '2017/18', '2018/19', '2019/20', '2020/21', '2021/22',
       '2022/23', '% Change'],
      dtype='object')

In [4]:
# drop unwanted column
data.drop('Unnamed: 0', axis=1, inplace=True)
data

Unnamed: 0,Place of Origin,1949/50,1954/55,1959/60,1964/65,1969/70,1974/75,1979/80,1984/85,1989/90,...,2014/15,2015/16,2016/17,2017/18,2018/19,2019/20,2020/21,2021/22,2022/23,% Change
0,Burundi,0,0,0,6,6,5,12,32,54,...,165,148,180,134,141,164,149,157,194,23.6
1,Djibouti,0,0,0,0,0,1,2,16,19,...,7,9,10,9,9,6,6,6,10,66.7
2,Eritrea,0,0,0,0,0,0,0,0,0,...,108,109,138,151,117,81,53,77,73,-5.2
3,Ethiopia,37,62,170,266,540,2050,1340,2120,2020,...,1472,1517,1847,2118,2061,2356,2166,2680,3006,12.2
4,Kenya,0,19,156,774,492,870,1850,1890,2200,...,3072,3019,3189,3322,3451,3710,3502,3799,4059,6.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236,Tuvalu,0,0,0,0,0,0,0,0,1,...,7,4,3,4,4,4,6,1,3,200.0
237,Vanuatu,0,0,0,0,0,0,0,2,1,...,8,9,11,14,15,13,11,11,8,-27.3
238,Wallis and Futuna,0,0,0,0,0,0,0,0,0,...,18,4,2,0,2,0,3,9,0,-100.0
239,"Pacific Islands, Unspecified",0,15,38,145,327,1263,2033,1686,1028,...,0,0,0,0,0,0,0,0,0,0.0


In [5]:
# check data types
data.dtypes

Place of Origin     object
1949/50              int64
1954/55              int64
1959/60              int64
1964/65              int64
1969/70              int64
1974/75              int64
1979/80              int64
1984/85              int64
1989/90              int64
1994/95              int64
1999/00              int64
2000/01              int64
2001/02              int64
2002/03              int64
2003/04              int64
2004/05              int64
2005/06              int64
2006/07              int64
2007/08              int64
2008/09              int64
2009/10              int64
2010/11              int64
2011/12              int64
2012/13              int64
2013/14              int64
2014/15              int64
2015/16              int64
2016/17              int64
2017/18              int64
2018/19              int64
2019/20              int64
2020/21              int64
2021/22              int64
2022/23              int64
% Change           float64
dtype: object

In [6]:
# create map plot of 2022/23 international student figures

fig = px.choropleth(data_frame=data, locations='Place of Origin', locationmode='country names', color='2022/23')
fig.show()