In [7]:
import pandas as pd
import csv
import matplotlib.pyplot as plt

In [8]:


# Read the CSV file including all columns
csv_file_path = '/Users/brandoncooke/Public/NEW REP SPACE/US_Southern_Border_Metrics/Data/DATA FOR BORDER MAP/Sheet 1.csv'
df = pd.read_csv(csv_file_path, sep='\t', encoding='utf-16')

# Exclude any unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]


column_names = ['Country', 'Country_From', 'Direction', 'From', 'The','Blank', 'Latitude', 'Longitude', 'Population_Label', 'Migrants_Label', 'SQRT_Migrants', 'Sum_Total_Country_1', "Sum_Total_Country_2"]

# Assign the adjusted column names to the DataFrame
df.columns = column_names

df.head(10)

Unnamed: 0,Country,Country_From,Direction,From,The,Blank,Latitude,Longitude,Population_Label,Migrants_Label,SQRT_Migrants,Sum_Total_Country_1,Sum_Total_Country_2
0,United States,,,,,,40.0792,-98.8164,,,,251703000,7594950000
1,United States,Afghanistan,in,from,the,,34.023,65.5267,Migrants in country (immigrants),143000.0,377.497019856,5854000,50633000
2,United States,Albania,in,from,the,,40.654,20.076,Migrants in country (immigrants),97000.0,311.852529251,1250000,50633000
3,United States,Algeria,in,from,the,,28.6045,2.64,Migrants in country (immigrants),29000.0,168.911219284,2022000,50633000
4,United States,Angola,in,from,the,,-12.836,17.808,Migrants in country (immigrants),20000.0,142.986013302,668000,50633000
5,United States,Antigua and Barbuda,in,from,the,,17.625,-61.786,Migrants in country (immigrants),46000.0,214.562345252,67000,50633000
6,United States,Argentina,in,from,the,,-33.166,-64.31,Migrants in country (immigrants),219000.0,468.452772433,1076000,50633000
7,United States,Armenia,in,from,the,,40.56,44.449,Migrants in country (immigrants),87000.0,295.79384713,958000,50633000
8,United States,Aruba,in,from,the,,12.5176,-69.9818,Migrants in country (immigrants),10000.0,97.857038582,21000,50633000
9,United States,Australia,in,from,the,,-24.578,133.582,Migrants in country (immigrants),103000.0,321.630222461,599000,50633000


In [9]:
print(df.columns)

Index(['Country', 'Country_From', 'Direction', 'From', 'The', 'Blank',
       'Latitude', 'Longitude', 'Population_Label', 'Migrants_Label',
       'SQRT_Migrants', 'Sum_Total_Country_1', 'Sum_Total_Country_2'],
      dtype='object')


In [10]:
import pandas as pd
import plotly.express as px


migrants_df = df[df['Country_From'].notna()].copy()

# Convert 'Sum_Total_Country_1' column to numeric
migrants_df['Migrants_Label'] = pd.to_numeric(migrants_df['Migrants_Label'].str.replace(',', ''), errors='coerce')

# Create a scatter plot using Plotly
fig = px.scatter_geo(migrants_df, 
                     lon='Longitude', 
                     lat='Latitude', 
                     hover_name='Country_From',
                     color_discrete_sequence=['blue'],
                     size='Migrants_Label',  # Use 'Sum_Total_Country_1' for size
                     projection='orthographic')  


fig.update_layout(
    title='Migrants to the United States from Other Countries',
    geo=dict(
        landcolor='rgb(230, 230, 230)',  
        showocean=True,  
        oceancolor='rgb(173, 216, 230)', 
        showland=True,  
        showlakes=True,  
        lakecolor='rgb(173, 216, 230)',  
        showrivers=True,  
        rivercolor='rgb(173, 216, 230)',  
        showframe=False,  
        showcoastlines=True, 
        showcountries=True,
        countrywidth=1, 
    ),
    width=1000,
    height=1000
)

fig.show()

# Save the plot as an HTML file
fig.write_html("migrants_map.html")


In [11]:
import pandas as pd
import plotly.graph_objects as go

# Filter out rows where migrants are from other countries
migrants_df = df[df['Country_From'].notna()]

# Convert 'Migrants_Label' column to numeric
migrants_df['Migrants_Label'] = pd.to_numeric(migrants_df['Migrants_Label'].str.replace(',', ''), errors='coerce')

# Create a Plotly figure
fig = go.Figure(go.Scattergeo(
    lon = migrants_df['Longitude'],
    lat = migrants_df['Latitude'],
    text = migrants_df['Country_From'],
    marker = dict(
        color = migrants_df['Migrants_Label'], # Set color equal to migrant label
        colorscale = 'Viridis', # Choose a colorscale
        reversescale = True, # Reverse colorscale
        cmin = 0, # Set minimum value of the color scale
        cmax = migrants_df['Migrants_Label'].max(), # Set maximum value of the color scale
        colorbar_title = 'Migrants' # Set color bar title
    )
))

# Customize layout
fig.update_layout(
    title = 'Migrants to the United States from Other Countries (as of 20240101)',
    geo=dict(
        projection_type='orthographic',
        landcolor = 'rgb(230, 230, 230)',
        coastlinecolor = 'rgb(160, 160, 160)',
        showocean=True,  # Show ocean
        oceancolor='rgb(173, 216, 230)',  # Color of the ocean
        showland=True,  # Show land
        showlakes=True,  # Show lakes
        lakecolor='rgb(173, 216, 230)',  # Color of the lakes
        showrivers=True,  # Show rivers
        rivercolor='rgb(173, 216, 230)',  # Color of the rivers
        showframe=False,  # Hide frame around the map
        showcoastlines=True,  # Show coastlines
        showcountries=True,  # Show country boundaries
        countrywidth=1,  # Width of country boundaries
    ),
    width=1000,  # Set the width of the plot
    height=800,  # Set the height of the plot
)

# Show the plot
fig.show()

fig.write_html("New_migrants_map.html")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

