## Build a Web App to use a ML Model


In [13]:
import pandas as pd
import numpy as np

ufos = pd.read_csv('./data/ufos.csv')
ufos.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [14]:
# Convert the ufos data to a small dataframe with fresh titles. Check the unique values in the Country field.
ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})

ufos.Country.unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [15]:
#reduce the amount of data we need to deal with by dropping any null values and only importing sightings between 1-60 seconds
ufos.dropna(inplace=True)
ufos = ufos[(ufos['Seconds'] >= 1 & (ufos['Seconds'] <=60))]

ufos.info()

<class 'pandas.core.frame.DataFrame'>
Index: 70583 entries, 0 to 80331
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Seconds    70583 non-null  float64
 1   Country    70583 non-null  object 
 2   Latitude   70583 non-null  float64
 3   Longitude  70583 non-null  float64
dtypes: float64(3), object(1)
memory usage: 2.7+ MB


In [16]:
# Import Scikit-learn's LabelEncoder library to convert the text values for countries to a number
from sklearn.preprocessing import LabelEncoder
ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])

ufos.head()

Unnamed: 0,Seconds,Country,Latitude,Longitude
0,2700.0,4,29.883056,-97.941111
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
4,900.0,4,21.418056,-157.803611
5,300.0,4,36.595,-82.188889


## Build your model
 train a model by dividing the data into the training and testing group.

In [17]:
from sklearn.model_selection import train_test_split

selected_features = ['Seconds', 'Latitude', 'Longitude']

X = ufos[selected_features]
y = ufos['Country']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [18]:
#Train model
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)


print(classification_report(y_test, predictions))
print('Predicted labels: ', predictions)
print('Accuracy: ', accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        98
           1       0.00      0.00      0.00       587
           2       0.00      0.00      0.00        16
           3       0.00      0.00      0.00       359
           4       0.93      1.00      0.96     13057

    accuracy                           0.92     14117
   macro avg       0.19      0.20      0.19     14117
weighted avg       0.86      0.92      0.89     14117

Predicted labels:  [4 4 4 ... 4 4 4]
Accuracy:  0.9249132251894878


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
