<a href="https://www.kaggle.com/code/dascient/uacp-defining-powellscore-veracity-variables?scriptVersionId=144049125" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# 2.0 UACP - Defining PowellScore & Veracity Variables
## [1.0 UAP Analytic Centralization Program](https://www.kaggle.com/code/dascient/uacp-uap-analytic-centralization-program)
<br>

## [NLP - Sentiment Intensity Analyzer](https://github.com/cjhutto/vaderSentiment) Against Reporting Comments
<br>

### In collaboration with The Scientific Coalition for UAP Studies [(SCU)](ExploreSCU.org).
Here we isolate only pertinent variables from the original dataset. We've also decided to leave open most of the code cells below; enabling transparency on foundation of all variables. 

In [None]:
%%time
# for the sake of expeditious analysis
!pip install xlrd
import warnings
warnings.filterwarnings("ignore")
from IPython.display import clear_output
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt
from shapely.geometry import Point
import geopandas as gpd
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from geopandas import GeoDataFrame
import matplotlib.colors as colors
import seaborn as sns
import random as r

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        #print('Files loaded.')
        
pd.set_option('display.max_colwidth', None)

# loading first nuforc dataframe
og_df1 = pd.read_csv('/kaggle/input/ufo-sightings/ufos.csv',header=0)
df = og_df1.dropna().copy()
og_df2 = pd.read_csv('/kaggle/input/d/NUFORC/ufo-sightings/scrubbed.csv',header=0)
df2 = og_df2.dropna().copy()

#############################################

lex = pd.read_excel('/kaggle/input/scu-nlp-uap-lexicon/UFO lexicon rev2.xls',sheet_name='Sheet1',header=7)
lex = lex.dropna(how='all').drop(columns='Unnamed: 0').copy()

#############################################
# sanitize
# drop some columns, for now
df = df.drop(columns=['datetime','duration (hours/min)'])

# date posted deemed to be easily conveible to timestamp values, so i'm gonna work with that for now.
df['date posted'] = df['date posted'].astype('datetime64[ns]')


# length of comments
df['comment_length'] = [len(str(v[0:500])) for i,v in df.comments.items()]


# convert seconds to minutes
df["duration (minutes)"] = [int(v)/60 for i,v in df["duration (seconds)"].items()]


# creating Geo Point column for sopecial use below
df['Geo Point'] = df.apply(lambda x:'%s, %s' % (x['latitude'],x['longitude']),axis=1)


# let's create subsets of our 80,000 here: 
# we can implement conditionals, remove/analyze outliers, 
# & will enable for back referencing when starting to run 
# robust AI-ML modeling that would otherwise take much longer to run.

# let's create subsets from the main dataframe/reporting-data w/ respect to duration of observations
df_under100 = df[df["duration (minutes)"]<100]
df_under60 = df[df["duration (minutes)"]<60]

# random binary column for future AI-ML modeling.
a=['balloon','spacejunk','sensor_malfunction','undentified','anomalous']     
df['verified'] = pd.Series(r.choices(a,k=len(df),weights=(50, 40, 30, 20, 10)),index=df.index)

# shape-focused
circles = df[df['shape'] == 'circle']
spheres = df[df['shape'] == 'sphere']
lights = df[df['shape'] == 'light']
teardrops = df[df['shape'] == 'teardrop']

# year-month
df['year_month'] = df['date posted'].dt.to_period('M')

clear_output()
# show
print("\nOriginal dataset.")
print(f"\nReports: {len(df)} non-null dataframe.")
print("\nMatrix:",df.shape[0],"rows,",df.shape[1],"columns")
df = df.sort_values('date posted',ascending=True).reset_index(drop=True)
df.tail(11).reset_index(drop=True).style.background_gradient(cmap ='seismic').set_properties(**{'font-size': '11px'}).set_properties(**{'text-align': 'left'})

# Lexicon

In [None]:
#lex[lex['RATING']!=0]
#lex[lex['Previous Rating']!=0]
# non-zero rating words
lex_nonzero = lex[lex['RATING']!=0]

#lex[lex['Previous Rating']>=3]
# rating words gerater than or equal to 3
#lex_nonzero = lex[lex['RATING']>=3]
lex_nonzero

## Hash through each comment to find only those that include non-zero lexicon words. 

In [None]:
%%time
import time
from nltk.tokenize import word_tokenize

df_sample = df.sample(1000)

# hash through each comment to find only those that include non-zero lexicon words
lexicon_favored = df_sample.copy()
lexicon_favored['rating'] = pd.Series()
lexicon_favored['lexicon_word'] = pd.Series()
lexicon_favored['word_count'] = pd.Series()

for i,word in lex_nonzero.WORD.items():
    for i2,piece in df_sample.comments.items():     
        if word in word_tokenize(piece.lower()):
            #print('index',i2,'\nword',word, '\npiece',piece.lower(), '\nrating', lex_nonzero.RATING[i],'\n')

            # add rating from lexicon
            lexicon_favored['rating'][i2] = lex_nonzero.RATING[i]

            # add up every word usage in comments
            lexicon_favored['lexicon_word'][i2] = lex_nonzero.WORD[i]
            
            # word count
            lexicon_favored['word_count'][i2] = len(word_tokenize(piece.lower()))
        else:
            # word count
            lexicon_favored['word_count'][i2] = len(word_tokenize(piece.lower()))
            
lexicon_favored = lexicon_favored
#clear_output()

### Lexicon-Focused Dataset
<br>

#### Snippet

In [None]:
lexicon_favored.head()

In [None]:
any(lexicon_favored.index.duplicated())

In [None]:
import matplotlib.pyplot as plt
_ = plt.hist(lexicon_favored.word_count, bins='auto')  # arguments are passed to np.histogram
plt.title("Word Counts Histogram with 'auto' bins")
plt.show()

In [None]:
# stopwords
import nltk
from nltk.corpus import stopwords
 
#nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

lexicon_favored['just_words'] = pd.Series()
filtered_sentence = []

for i,piece in lexicon_favored.comments.items():     
    for word in word_tokenize(piece.lower()):
        if word not in stop_words:
            filtered_sentence.append({i, word})

### Ranked by Lexicon Rating.

In [None]:
lexicon_favored = lexicon_favored.sort_values('rating',ascending=False).reset_index(drop=True)
lexicon_favored.head(25).style.background_gradient(cmap ='seismic').set_properties(**{'font-size': '11px'})

# "Be careful not to choke on your aspirations." - Darth Vader
### Application of [VADER](https://github.com/cjhutto/vaderSentiment) (Valence Aware Dictionary and sEntiment Reasoner)
A lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media.

In [None]:
# https://www.geeksforgeeks.org/python-sentiment-analysis-using-vader/
# https://github.com/cjhutto/vaderSentiment
# import SentimentIntensityAnalyzer class
# from vaderSentiment.vaderSentiment module.

from nltk.sentiment.vader import SentimentIntensityAnalyzer

# function to print sentiments
# of the sentence.
def sentiment_scores(sentence):

    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
    sentiment_dict = sid_obj.polarity_scores(sentence)
    
    # create a list
    results = []
    results.append({"% Positive":sentiment_dict['pos'],
                    "% Negative":sentiment_dict['neg'],
                    "% Neutral":sentiment_dict['neu']
                   })
    results = pd.DataFrame(results)
    return results

# Apply to df['comments'] column.
def NLP_PowellScore(commentsColumns):
    
    # obtain each comment for 'comments' column
    eachComment = [eachComment for i,eachComment in commentsColumns.items()]
    eachComment = pd.Series(eachComment)
                               
    # vader.variables.PowellScore
    PowellPositive = [v for v in list([sentiment_scores(sentimentAnalyzedComment)["% Positive"][0] for i,sentimentAnalyzedComment in eachComment.items()])]
    PowellNegative = [v for v in list([sentiment_scores(sentimentAnalyzedComment)["% Negative"][0] for i,sentimentAnalyzedComment in eachComment.items()])]
    PowellNeutral = [v for v in list([sentiment_scores(sentimentAnalyzedComment)["% Neutral"][0] for i,sentimentAnalyzedComment in eachComment.items()])]
    
    return PowellPositive,PowellNegative,PowellNeutral

### Reports sorted by Veracity
We also added the "Rating" score from Lexicon.

In [None]:
# let's only take a small sample - this will definitely take a few minutes, grab yourself some water...
robert = lexicon_favored.copy()#.sample(30000)
robert.rating.value_counts()

In [None]:
%%time
# defining Powell Scores by sentiment outputs: Positive, Negative, Neutral, & Rating
robert["PowellPositive"] = NLP_PowellScore(robert['comments'])[0]
robert["PowellNegative"] = NLP_PowellScore(robert['comments'])[1]
robert["PowellNeutral"] = NLP_PowellScore(robert['comments'])[2]

# PowellScore 
robert["PowellScore"] = (robert["PowellPositive"]-robert["PowellNegative"])/robert["PowellNeutral"]

# veracity
robert["veracity"] = robert["PowellScore"]*robert["comment_length"]*robert["rating"] # FINALLY, THIS EQUATION ACCOUNTS FOR POWELL'S LEXICON RATINGS!

# veracity is still very much in progress. we are looking for ways forward to 
# better define them. although, it is important to note that "veracity" will 
# be variable that is subjective to the type of datasets.
columns = ['date posted','city','state','shape','comments',
           'comment_length','latitude','longitude','duration (minutes)',\
           'PowellScore','veracity','rating','lexicon_word']

df1 = robert[columns].sort_values('veracity',ascending=False).reset_index(drop=True)
df1[df1['comment_length']>10].head(20)\
        .style.background_gradient(cmap ='seismic').set_properties(**{'font-size': '11px'})

## Ovals seen in California

In [None]:
%%time
# ca_oval
ca_oval = df1[df1.state=='ca'].reset_index(drop=True)
ca_oval = ca_oval[ca_oval['shape']=='oval']

# only ovals
ca_oval_162 = ca_oval.sort_values(['latitude','longitude'])
robert_ca_oval_162 = ca_oval_162
robert_ca_oval_162["PowellPositive"] = NLP_PowellScore(robert_ca_oval_162['comments'])[0]
robert_ca_oval_162["PowellNegative"] = NLP_PowellScore(robert_ca_oval_162['comments'])[1]
robert_ca_oval_162["PowellNeutral"] = NLP_PowellScore(robert_ca_oval_162['comments'])[2]

# PowellScore 
robert_ca_oval_162["PowellScore"] = (robert_ca_oval_162["PowellPositive"]-robert_ca_oval_162["PowellNegative"])/robert_ca_oval_162["PowellNeutral"]

# veracity
robert_ca_oval_162["veracity"] = robert_ca_oval_162["PowellScore"]*robert_ca_oval_162["comment_length"]*robert_ca_oval_162["rating"]


df2 = robert_ca_oval_162[columns].sort_values(['veracity'],ascending=False).reset_index(drop=True)
df2.head(50).style.background_gradient(cmap ='seismic').set_properties(**{'font-size': '11px'})

## Powell Variables in 3D
This is a 3D-interactive chart that uses the date posted, veracity, & PowellScore variables. Colored by lexicon rating. Sized by comment_length.

By definition, these actually render 5-Dimensional charts, if one considers veracity & commenth lengths of reports as 'features of a situation'.

In [None]:
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

df_plot = df1[df1.rating>0]
# graph
fig = px.scatter_3d(df_plot, x='date posted', y='veracity', z='PowellScore',
              color='rating',
              size = 'comment_length',
              hover_name = 'city',
              hover_data=['city','state','comments','rating','lexicon_word'],              
              opacity=0.5,
              size_max=17
                   )
fig.show()

### This one shows Date Posted vs PowellScore & Lexicon Rating Variables of California Oval reports. Colored by Veracity.

In [None]:
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# graph
fig = px.scatter_3d(df1, x='date posted', y='PowellScore', z='rating',
              color='veracity',
              size = 'comment_length',
              hover_name = 'city',
              hover_data=['city','state','comments','rating','lexicon_word'],              
              opacity=0.5,
              size_max=17
                   )
fig.show()

### Ovals seen in California - Date Posted vs PowellNeutral vs PowellScore

In [None]:
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# graph
fig = px.scatter_3d(robert_ca_oval_162, x='date posted', y='PowellNeutral', z='rating',
              color='veracity',
              size = 'comment_length',
              hover_name = 'city',
              hover_data=['city','state','comments','shape','veracity','rating'],              
              opacity=0.5,
              size_max=17
                   )
fig.show()

These variables are still very much in progress & there currently is no process for defining them. Despite the disparate, disconnected, & wide range of skeptic/non-skeptic relational databases — we have managed to connect with organizations that promote open source — public repositories & most are willing to coordinate with one another in developing a UAP Reporting & Events Hub. Wherein all pertinent reports, sightings, measurements, & signatures are to be populated by various factors from multiple disciplines & technologies. We will do our best to coordinate with prominent key members of the UAP community in order to contribute to building out a “standardized” reporting mechanism in an intelligible & non-duplicative fashion. We are looking for ways forward in getting access to real-time, current reports.

The goal would be to create something similar to an Order of Battle, so that reports at specific times & locations can be compared to past reports to augment credibility determination, as well as eventually be compared to known events that may explain them. Once those explanations are vetted, reports would be coded by likelihood of mundane vs anomalous, which would aid in the processing of similar events in the future.

In addition, we have already begun looking for trends over time, such as the time of day when reported events take place, & the type of object reported over the decades. The latter can be observed in the “Shapes by Share of Reports” chart, which provides indications of confirmation bias in observed behavior.

Finally, big data analysis (alongside robust AI|ML|DS modeling techniques) could also provide insight into the development of improved collection & reporting processes, which currently appear to be undefined, improving the quality of the data we receive. — K. Kolbe.

# Different NLP Methods

# DaS-VADER Sentiment Analyzer

Here, we begin focusing on the lexicon analysis of each comment submission.

In [None]:
df1[['comments']].sample(15).style.set_properties(**{'text-align': 'left'})

In [None]:
# lets split up every word from every column & frame that in itself, call it "words"
a = [v.split(' ') for i,v in df1.comments.items()]
flatlist=[]
for sublist in a:
    for element in sublist:
        flatlist.append(element)
comments = pd.DataFrame(flatlist, columns=['words'])
comments

In [None]:
pd.set_option('display.max_rows', None)

comments.words.value_counts().head(20)

In [None]:
comments.words.value_counts().head(50).plot(kind='barh',figsize=(15,7))

In [None]:
# billy-boy!
# isolate only "adjectives, nouns, verbs, & adverbs"

# Visual Insights

## Word Clouds
### All 500 samples.

In [None]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

# in the clouds
# 500 samples 
comment_words = ''
stopwords = set(STOPWORDS)
 
# iterate through the csv file
for val in df1.comments:
     
    # typecaste each val to string
    val = str(val)
 
    # split the value
    tokens = val.split()
     
    # Converts each token into lowercase
    for i in range(len(tokens)):
        tokens[i] = tokens[i].lower()
     
    comment_words += " ".join(tokens)+" "
 
wordcloud = WordCloud(width = 1200, height = 600,
                background_color ='white',
                stopwords = stopwords,
                min_font_size = 7,
                colormap='twilight').generate(comment_words)
 
# plot the WordCloud image                      
plt.figure(figsize=(25,10), facecolor='None')
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
 
plt.show()

### California Ovals 

In [None]:
# in the clouds
# california ovals
comment_words = ''
stopwords = set(STOPWORDS)
 
# iterate through the csv file
for val in df2.comments:
     
    # typecaste each val to string
    val = str(val)
 
    # split the value
    tokens = val.split()
     
    # Converts each token into lowercase
    for i in range(len(tokens)):
        tokens[i] = tokens[i].lower()
     
    comment_words += " ".join(tokens)+" "
 
wordcloud = WordCloud(width = 1200, height = 600,
                background_color ='white',
                stopwords = stopwords,
                min_font_size = 7,
                colormap='twilight').generate(comment_words)
 
# plot the WordCloud image                      
plt.figure(figsize=(25,10), facecolor='None')
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
 
plt.show()

### Top Veracity Comments

In [None]:
df1.reset_index(drop=True).set_index('veracity').head(20)[['comments','rating','lexicon_word','shape','city']]

### California Ovals Comments

In [None]:
df2.set_index('veracity')[['comments','shape','city']]

# MUFON Data Exploitation

In [None]:
%%time
import pandas as pd
!pip install xlrd
# Pull in data
mufon = pd.read_excel('/kaggle/input/scu-nlp-uap-lexicon/Powell with Comments.xls',sheet_name='mufon_cms_2017-04-09',header=0)
witnesses = pd.read_excel('/kaggle/input/scu-nlp-uap-lexicon/Powell with Comments.xls',sheet_name='Sheet1',header=0)
mufon = mufon.dropna(how='all').copy()
mufon.head()

In [None]:
%%time
import time
from nltk.tokenize import word_tokenize

df_sample1 = mufon.sample(1000)

# hash through each comment to find only those that include non-zero lexicon words
lexicon_favored1 = df_sample1.copy()
lexicon_favored1['rating'] = pd.Series()
lexicon_favored1['lexicon_word'] = pd.Series()
lexicon_favored1['word_count'] = pd.Series()

for i,word in lex_nonzero.WORD.items():
    for i2,piece in df_sample1['Detailed Description'].items():     
        if word in word_tokenize(piece.lower()):
            #print('index',i2,'\nword',word, '\npiece',piece.lower(), '\nrating', lex_nonzero.RATING[i],'\n')

            # add rating from lexicon
            lexicon_favored1['rating'][i2] = lex_nonzero.RATING[i]

            # add up every word usage in comments
            lexicon_favored1['lexicon_word'][i2] = lex_nonzero.WORD[i]
            
            # word count
            lexicon_favored1['word_count'][i2] = len(word_tokenize(piece.lower()))
        else:
            # word count
            lexicon_favored1['word_count'][i2] = len(word_tokenize(piece.lower()))
#clear_output()

In [None]:
lexicon_favored1.head()

In [None]:
%%time
#mufon = mufon.sample(30)
# defining Powell Scores by sentiment outputs: Positive, Negative, & Neutral
lexicon_favored1["PowellPositive"] = NLP_PowellScore(lexicon_favored1['Detailed Description'])[0]
lexicon_favored1["PowellNegative"] = NLP_PowellScore(lexicon_favored1['Detailed Description'])[1]
lexicon_favored1["PowellNeutral"] = NLP_PowellScore(lexicon_favored1['Detailed Description'])[2]

# PowellScore 
lexicon_favored1["PowellScore"] = (lexicon_favored1["PowellPositive"]-lexicon_favored1["PowellNegative"])/lexicon_favored1["PowellNeutral"]

# veracity
lexicon_favored1["veracity"] = lexicon_favored1["PowellScore"]*lexicon_favored1["Length"]*lexicon_favored1["Score"]

df3 = lexicon_favored1.sort_values('veracity',ascending=False).reset_index(drop=True)
df3.head(100)\
        .style.background_gradient(cmap ='seismic').set_properties(**{'font-size': '11px'})

In [None]:
df3.to_csv('mufon_lexicon.csv',index=False)

## Data Science

In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

# encoding
from sklearn.preprocessing import LabelEncoder

def encode(df):
    lb_make = LabelEncoder()
    columns = df.columns.values.tolist()
    df_encoded = df[columns].copy()

    # categorize/encode
    for i in columns:
        df_encoded[i] = lb_make.fit_transform(df[i])

    # encoded
    return df_encoded


# encoded variable re-mapping
def encoding_remap(df, df_encoded, target):
    
    X_test = X_y_sets(df, target)[0][0]
    
    remap = pd.merge(df_encoded.loc[df_encoded.index.isin(X_test.index.values)][target].reset_index(),
              df.loc[df.index.isin(X_test.index.values)][[target]].reset_index(),on=['index'])
    
    remap[target] = [str(remap[f'{target}_y'][i]) for i,v in remap[f'{target}_x'].items()]
    remap['index'] = np.array([str(remap[f'{target}_x'][i]) for i,v in remap[f'{target}_x'].items()]).astype(int)
    remap=remap[[target,'index']]
    remap = remap.set_index('index').drop_duplicates().sort_values('index')
    
    return remap


# pairplot
import seaborn as sns
def pairplot(df, target):
    return sns.pairplot(df.sample(int(len(df/10000))),hue=target)
    
    
# create X,y variables for ML
from sklearn.model_selection import train_test_split
def X_y_sets(df, target):
    X = df.dropna().drop(columns=[target]).copy()
    y = df.dropna()[target].ravel().copy()
    
    return train_test_split(X, y, test_size=0.33, random_state=42), X, y

In [None]:
# is scaling necessary?
# construction of ML dataframes
target = 'Disposition'#''veracity','PowellScore','NLP_PowellScore'

# copy
a = lexicon_favored1[['Witnesses','Score','Length','Nearest City','State',\
        'Object Shape Primary','Exact Latitude','Exact Longitude','Disposition','veracity','PowellScore']].copy()

# for the sake of computationa efficiency
a = a.head(10000).reset_index(drop=True)

In [None]:
a

In [None]:
# find random sample & save index for defining an encoded use-case
from random import randrange
idx = randrange(len(a))

# print random configuration item
print("\nThis is a randomly chosen subject we will try to predict.")
b = pd.DataFrame(a.loc[idx]).T
print(f"\nTarget:'{target}' value is: ",b.reset_index()[target][0],"\n")

# store sol'n
solution = str(b.reset_index()[target][0])

# print data point
b
# if this cell fails, try it again from step 1 - you ran into a null variable (i'll fix that soon enough)

In [None]:
# categorize/encode entire dataframe(a)
c = encode(a)
print("\nOriginal dataframe encoded into something we can run a classifier against.\n")
c.sample(10).reset_index(drop=True).style.background_gradient(cmap ='Pastel1').set_properties(**{'font-size': '10px'})

In [None]:
# 'comments' & 'country' - out
sns.pairplot(c[['Witnesses','Score','Length','Nearest City','State','Object Shape Primary','Exact Latitude','Exact Longitude','Disposition','veracity','PowellScore']]\
             .sample(100).copy(),
             hue=f'{target}',
             kind="kde",
             corner=True,
             palette="Paired"
            )

In [None]:
# print encoded item
use_case = pd.DataFrame(c.loc[idx]).T.drop(columns=[target]) 

#c

# print encoded item w/out target info
data = c.drop(columns=[target]) 

print("\nThis is what our encoded 'use-case' looks like - number form, just the way the machine likes it.\n")

use_case.style.background_gradient(cmap ='twilight').set_properties(**{'font-size': '10px'})

In [None]:
# create X,y variables for ML
# save trainer
print("\nResetting train data...\nCreating X-matrix & y-vector (target) for classification.")
trainer = c.loc[c.index!=idx].copy()
X, y =  trainer.drop(columns=[target]), trainer[target].ravel()
X_train, X_test, y_train, y_test = X_y_sets(trainer, target)[0]

In [None]:
X_train['target'] = pd.Series(y_train)
X_train.dropna().head().reset_index(drop=True).reset_index(drop=True).style.background_gradient(cmap ='twilight').set_properties(**{'font-size': '10px'})

In [None]:
# for the sake of adding the 'target' column above for sake of layman's explanation
X_train, X_test, y_train, y_test = X_y_sets(trainer, target)[0]

In [None]:
# encoded variable re-mapping
# specific to our current target choice
d = encoding_remap(a, c, target)
print("\nDecoding our encoded dataframe to correlate with the initial randomly chosen subject.\n")

In [None]:
print("\n-Live prediction-\nThinking...\n")

# MLP
clf = MLPClassifier(alpha=0.666, max_iter=666).fit(X_train, y_train)

print()
print("Test score (confidence): ",clf.score(X_test, y_test)*100,"%")
print()
prediction = clf.predict(use_case)[0]
print(f"Prediction {target} index:",prediction)

# print decoded prediction
print("\nPrediction Decoded")
e = d[d.index == prediction]
e

In [None]:
solved = str(e['Disposition'][e.index[0]])
if solution == solved:
    print(f"\nYUP!\n\nThe machine's prediction against target variable '{target}' was correct!\n")
else:
    print("\nNOPE!\nThe machine's prediction was incorrect :(")
    
print()

In [None]:
# en fin