In [None]:
# iMigraInsight: Revolutionizing Immigration Data Intelligence

## Introduction

#Welcome to iMigraInsight, where we embark on a journey to revolutionize immigration data intelligence. 
#Our project is driven by the vision of a more efficient and compassionate approach to collecting immigration 
#data using advanced data science and artificial intelligence. With a focus on enhancing national security, 
#protecting human rights, and informing data-driven policy development, iMigraInsight aims to address the pressing 
#challenges of modern immigration. Join us as we navigate the complexities of immigration with innovation,
#empathy, and a commitment to making a positive impact.


In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score, GridSearchCV
import joblib
import plotly.express as px
import plotly.graph_objects as go


In [None]:
#Cell 2: Load Data from Excel Files

In [None]:
import pandas as pd
import os

# Directory where your Excel files are located on Linux ONE
directory = '/home/jovyan/shared'

# Initialize an empty list to store your DataFrames
dataframes = []

# List all files in the directory
file_list = os.listdir(directory)

# Iterate through the list of files
for file in file_list:
    if file.endswith('.xls'):  # Assuming your files have the .xls extension
        file_path = os.path.join(directory, file)
        df = pd.read_excel(file_path)
        dataframes.append(df)




In [None]:
#Cell 3: Data Splitting

In [None]:

# Load data from "fy2021_table9d.xlsx"
import pandas as pd

# Provide the file path to your Excel file
file_path = '/home/jovyan/shared/fy2021_table9d.xlsx'

# Load the data from the Excel file into a DataFrame
data_additional = pd.read_excel(file_path)

# Display the first few rows of the DataFrame to examine the data
data_additional.head()


import tabula

# Read data from a PDF file (replace 'your_pdf_file.pdf' with the actual file path)
tables = tabula.read_pdf('/home/jovyan/shared/2023_0703_plcy_fiscal_year_2022_border_security_metrics_report_2021_data.pdf', pages='all')

# Convert each table into a DataFrame
dataframes = [table for table in tables]


In [None]:
#Cell 4: Create and Train Model

In [None]:
# Create a RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)


In [None]:
#Cell 5: Model Evaluation

In [None]:
# Evaluate model accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")



In [None]:
#Cell 6: Hyperparameter Tuning

In [None]:
# Perform hyperparameter tuning with GridSearchCV
param_grid = {
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")


In [None]:
Cell 7: Cross-Validation

In [None]:
# Use cross-validation to ensure model consistency
cross_val_scores = cross_val_score(model, X, y, cv=5)
mean_cv_score = cross_val_scores.mean()
print(f"Mean Cross-Validation Score: {mean_cv_score}")


In [None]:
#Cell 8: Save the Model

In [None]:
# Save the trained model to a file
model_filename = 'immigration_model.pkl'
joblib.dump(model, model_filename)


In [None]:
#Cell 9: Load and Use Model for Predictions

In [None]:
# Later, you can load the model for predictions
loaded_model = joblib.load(model_filename)
predictions = loaded_model.predict(X_new)


In [None]:
#Cell 10: Data Visualization - Bar Chart

In [None]:
# Create an interactive bar chart for immigration by country
fig = px.bar(data, x='Country', y='Immigration Count', title='Immigration by Country')
fig.show()


In [None]:
#Cell 11: Data Visualization - Geographic Heatmap

In [None]:
# Create an interactive geographic heatmap
fig = go.Figure(data=go.Choropleth(
    locations=data['Country'],  # Country codes or names
    z=data['Immigration Count'],  # Values for heatmap
    locationmode='country names',
    colorscale='Viridis',  # Choose a colorscale
    colorbar_title='Immigration Count',
))

fig.update_geos(projection_type="orthographic")
fig.update_layout(geo=dict(showcoastlines=True))

fig.show()
