## Importing Packages

In [170]:
import pandas as pd
import numpy as np

import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut, GridSearchCV

## Importing Peace Countries as well as some manual calculations

In [171]:
# Read the tab-delimited file into a DataFrame
df_peace = pd.read_csv('data/peace_10K.txt', delimiter='\t')
df_peace = df_peace.drop('Unnamed: 1',axis=1)
df_peace = df_peace.set_index('Unnamed: 0')

In [172]:
df_peace.sum()

Australia         431095758
New Zealand        35652792
Sweden             39352148
Austria             3111471
Belgium            22114947
Denmark            10560771
Norway             14209454
Finland            11709112
Netherlands         7930109
Czech Republic      1522757
dtype: int64

In [173]:
df_peace.loc['the']

Australia         25966824
New Zealand        2258492
Sweden             2398086
Austria             184331
Belgium            1476553
Denmark             652475
Norway              860205
Finland             708941
Netherlands         484238
Czech Republic      113151
Name: the, dtype: int64

In [174]:
df_peace = df_peace/df_peace.sum()

In [175]:
25966824/431095758

0.0602344688346481

In [176]:
df_peace.loc['the']

Australia         0.060234
New Zealand       0.063347
Sweden            0.060939
Austria           0.059242
Belgium           0.066767
Denmark           0.061783
Norway            0.060538
Finland           0.060546
Netherlands       0.061063
Czech Republic    0.074307
Name: the, dtype: float64

In [177]:
df_peace = df_peace.mean(1)
df_peace = df_peace.transpose()
df_peace['target'] = 1

In [188]:
#df_peace.to_csv('data/peaceful_averages.csv')

## Importing Non-peace countries

In [179]:
# Read the tab-delimited file into a DataFrame
df_nonpeace = pd.read_csv('data/nopeace_10K.txt', delimiter='\t')
df_nonpeace = df_nonpeace.drop('Unnamed: 1',axis=1)
df_nonpeace = df_nonpeace.set_index('Unnamed: 0')
df_nonpeace = df_nonpeace/df_nonpeace.sum()
df_nonpeace = df_nonpeace.mean(1)
df_nonpeace = df_nonpeace.transpose()
df_nonpeace['target'] = 0

## Combining the data

In [182]:
df = pd.concat([df_peace,df_nonpeace],axis=1)
df = df.transpose()
df = df.fillna(0)

In [186]:
df

Unnamed: 0,yogurt,airplane,esthetic,aging,aluminum,amortization,amortize,analog,analyze,annex,...,wt,xad,yadav,yatra,yearold,yemeni,yorker,zimbabwean,zionist,zonal
0,2e-06,3e-06,2e-06,2e-06,1.8e-05,0.000102,5e-06,5e-06,2.1e-05,9e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3e-06,5e-06,3e-06,2e-06,1.2e-05,2.3e-05,5e-06,3e-06,2.1e-05,4e-06,...,6.82126e-07,3.901241e-07,4.876968e-07,3.71603e-07,3.556851e-07,1.4e-05,5.813657e-07,6.8e-05,1.1e-05,5e-06


In [189]:
df_nonpeace.to_csv('data/nonpeaceful_averages.csv')

## ML Code (in progress)

In [81]:

# Separate the features (X) and the target (y)
X = df.drop("target", axis=1)
y = df["target"]

# Initialize an empty list to store the model's predictions for each fold
predictions = []

# Define hyperparameter grid for XGBoost
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1, 0.2]
}

# Initialize Leave-One-Out cross-validator
loo = LeaveOneOut()

# Initialize the XGBoost model
model = xgb.XGBClassifier()

# Initialize GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=loo, scoring='accuracy')

# Perform Leave-One-Out cross-validation with hyperparameter tuning
for train_index, test_index in loo.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Perform hyperparameter tuning
    grid_search.fit(X_train, y_train)
    
    # Get the best model from the grid search
    best_model = grid_search.best_estimator_
    
    # Make predictions on the test data for this fold
    y_pred = best_model.predict(X_test)
    
    # Append the predictions for this fold to the list
    predictions.append(y_pred[0])

# Calculate the overall accuracy of the model using LOO cross-validation
accuracy = accuracy_score(y, predictions)

# Print the overall accuracy and best hyperparameters
print("Leave-One-Out Cross-Validation Accuracy:", accuracy)
print("Best Hyperparameters:", grid_search.best_params_)


KeyboardInterrupt: 