### Cleaning Data

In [12]:
import pandas as pd
import numpy as np

ufos = pd.read_csv("./data/ufos.csv")
ufos = pd.DataFrame({"Seconds": ufos["duration (seconds)"], "Country": ufos["country"], "Latitude": ufos["latitude"], "Longitude": ufos["longitude"]})
ufos.Country.unique()
ufos.dropna(inplace = True)
ufos = ufos[(ufos["Seconds"] >= 1) & (ufos["Seconds"] <= 60)]

from sklearn.preprocessing import LabelEncoder

ufos["Country"] = LabelEncoder().fit_transform(ufos["Country"])


### Building the model

In [13]:
from sklearn.model_selection import train_test_split

selected_features = ["Seconds", "Latitude", "Longitude"]

X = ufos[selected_features]
y = ufos["Country"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(classification_report(y_test, predictions))
print("Prediction labels: ", predictions)
print("Accuracy: ", accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41
           1       0.82      0.22      0.35       250
           2       1.00      0.88      0.93         8
           3       0.99      1.00      1.00       131
           4       0.96      1.00      0.98      4743

    accuracy                           0.96      5173
   macro avg       0.96      0.82      0.85      5173
weighted avg       0.96      0.96      0.95      5173

Prediction labels:  [4 4 4 ... 3 4 4]
Accuracy:  0.9599845350860235


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### "Pickle" the model

In [17]:
import pickle

model_filename = "ufo-model.pkl"
pickle.dump(model, open(model_filename, "wb"))

model = pickle.load(open(model_filename, "rb"))
print(model.predict([[20,40.2,-2.9]]))

[3]


