# Notebook Details  
### This notebook takes the data from the below csv files and cleans it to get the migration from specific provinces into BC and to get other immigration details into BC, The Fraser Valley, and Greater Vancouver.
### Read Data:  
-'Data/interprovincial_migration_annual.csv'  
-'Data/regional_district_migration.csv'  
  

### Output Data:  
-'Data/yearly_provincial_migration_into_bc.csv'  
-'Data/yearly_provincial_migration_percentage_into_bc.csv'  
-'Data/greater_vancouver_immigration.csv'

In [1]:
import pandas as pd

housing_prices_path = 'Data/uncleaned/interprovincial_migration_annual.csv'
df = pd.read_csv(housing_prices_path)

print(df.head(5))

   Year Origin  N.L.  P.E.I.  N.S.  N.B.  Que.  Ont.  Man.  Sask.  Alta.  \
0  1971  Alta.   355     143   828   456  1054  6780  2574   4292      0   
1  1971   B.C.   284      73   900   565  1285  7781  2512   2274  11301   
2  1971   Man.   197     182   335   423  1126  7082     0   2953   4406   
3  1971   N.B.   830     473  2350     0  2124  5242   230    119    585   
4  1971   N.L.     0      89   953   678   650  4150    93     46    195   

    B.C.  Y.T.  N.W.T.  Nvt.  Total  
0  17717   607     952     0  35758  
1      0   460     724     0  28159  
2   5008    85     133     0  21930  
3    787    12      19     0  12771  
4    295     9      13     0   7171  


In [6]:
years = df['Year'].unique()
provinces = ['N.L.', 'P.E.I.', 'N.S.', 'N.B.', 'Que.', 'Ont.', 'Man.', 'Sask.', 'Alta.', 'Y.T.', 'N.W.T.', 'Nvt.']
columns = ['Year', 'Total_migration_into_BC', 'Total_migration_out_of_BC'] + provinces
aggregated_data = []

for year in years:
    # Filter data for the year
    year_data = df[df['Year'] == year]
    
    # Calculate the total migration into BC (sum of people moving from each province to BC)
    migration_into_bc = year_data.loc[year_data['Origin'] != 'B.C.', 'B.C.'].sum()
    
    # Calculate the total migration out of BC (sum of BC row but for other provinces)
    migration_out_of_bc = year_data.loc[year_data['Origin'] == 'B.C.', provinces].sum()
    
    # Create a new row for the aggregated data
    new_row = {
        'Year': year,
        'Total_migration_into_BC': migration_into_bc,
        'Total_migration_out_of_BC': migration_out_of_bc.sum()
    }
    
    # Add migration details for each province into BC
    for province in provinces:
        new_row[province] = year_data.loc[year_data['Origin'] == province, 'B.C.'].sum()
    
    aggregated_data.append(new_row)

# Convert the list of dictionaries to a DataFrame
aggregated_df = pd.DataFrame(aggregated_data, columns=columns)
print(aggregated_df.head())

aggregated_df.to_csv('Data/cleaned/yearly_provincial_migration_into_bc.csv', index=False)


   Year  Total_migration_into_BC  Total_migration_out_of_BC  N.L.  P.E.I.  \
0  1971                    48452                      28159   295     103   
1  1972                    72338                      47411   505     133   
2  1973                    87092                      56555   785     217   
3  1974                    84173                      61518   847     230   
4  1975                    61136                      64000   617     177   

   N.S.  N.B.  Que.   Ont.  Man.  Sask.  Alta.  Y.T.  N.W.T.  Nvt.  
0  1702   787  3196  12195  5008   6217  17717   481     751     0  
1  1890  1192  5146  17281  7901   9076  26944   885    1385     0  
2  2120  1453  4989  23499  8643   9883  32979   985    1539     0  
3  2882  1460  5484  24169  8650   8486  29382  1007    1576     0  
4  1926   976  4077  18201  6287   4982  22027   727    1139     0  


In [7]:
province_columns = ['N.L.', 'P.E.I.', 'N.S.', 'N.B.', 'Que.', 'Ont.', 'Man.', 'Sask.', 'Alta.', 'Y.T.', 'N.W.T.', 'Nvt.']

# Get percentage per year of migration from each province
for col in province_columns:
    aggregated_df[col] = (aggregated_df[col] / aggregated_df['Total_migration_into_BC']) * 100


print(aggregated_df.head())

aggregated_df.to_csv('Data/cleaned/yearly_provincial_migration_percentage_into_bc.csv', index=False)


   Year  Total_migration_into_BC  Total_migration_out_of_BC      N.L.  \
0  1971                    48452                      28159  0.608850   
1  1972                    72338                      47411  0.698112   
2  1973                    87092                      56555  0.901346   
3  1974                    84173                      61518  1.006261   
4  1975                    61136                      64000  1.009225   

     P.E.I.      N.S.      N.B.      Que.       Ont.       Man.      Sask.  \
0  0.212582  3.512755  1.624288  6.596219  25.169240  10.336003  12.831256   
1  0.183859  2.612735  1.647820  7.113827  23.889242  10.922337  12.546656   
2  0.249162  2.434208  1.668351  5.728425  26.981812   9.923988  11.347770   
3  0.273247  3.423901  1.734523  6.515153  28.713483  10.276454  10.081618   
4  0.289518  3.150353  1.596441  6.668739  29.771329  10.283630   8.149045   

       Alta.      Y.T.    N.W.T.  Nvt.  
0  36.566086  0.992735  1.549988   0.0  
1  37.2473

https://catalogue.data.gov.bc.ca/dataset/migration-by-development-region-and-regional-district   
Migration by Development Region and Regional District  
https://catalogue.data.gov.bc.ca/dataset/migration-by-development-region-and-regional-district/resource/726e54aa-f403-42d2-90b1-72c9a8c8e84f   
regional_district_migration 

In [8]:
imigration_info = 'Data/uncleaned/regional_district_migration.csv'
imi_df = pd.read_csv(imigration_info)
# Only get Immigration stats for British Columbia, Fraser Valley, Greater Vancouver
imi_df = imi_df[imi_df['Region'].isin(['British Columbia', 'Fraser Valley', 'Greater Vancouver'])]
# Drop un-needed rows
imi_df = imi_df.drop(columns=['SGC', 'Returning Emigrants', 'Interprovincial In', 'Interprovincial Out', 'Intraprovincial In', 'Intraprovincial Out'])
print(imi_df.head())

imi_df.to_csv('Data/cleaned/greater_vancouver_immigration.csv', index=False)


   Census Year             Region  Immigrants  Emigrants  \
0    2001-2002   British Columbia       38024      11958   
5    2001-2002      Fraser Valley        1239        576   
6    2001-2002  Greater Vancouver       34458       7380   
30   2002-2003   British Columbia       31924      12837   
35   2002-2003      Fraser Valley        1126        486   

    Net Temporary Emigrants  Net Non-permanent Residents  Net International  \
0                      4925                         8155              33197   
5                       235                          223                822   
6                      3039                         6403              32838   
30                     4923                         6345              25375   
35                      188                          176                952   

    Net Interprovincial  Net Intraprovincial  Net Total Migration  
0                 -8556                    0                24641  
5                  -484     