# Machine learning to predict new crescent moon visibility

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# MACHINE LEARNING IMPORTS
import sklearn
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
data_file = '..\\Data\\icouk_sighting_data_with_params.csv'
data = pd.read_csv(data_file)

#data = data.drop('Location', axis = 1)
#data = data.drop('Method', axis = 1)

data = data.drop('Index', axis = 1)
data.head(10)

In [None]:
# Visualising a couple of variables.
variable_list =  data.columns.tolist() #['Hijri year', 'Month', 'Seen?','Lat','Lon']
 #print(variable_list)

# List of label options
ptype = [r"Seen", r"Not_seen"]

# Plot data
#diag_kind='kde'
showpairplot = False
if showpairplot == True:
   fig = sns.pairplot(data[variable_list], hue = 'Seen', palette = 'bright')

# Change location of legend
fig._legend.set_bbox_to_anchor((1.05, 0.5))

# Add legend
for t, l in zip(fig._legend.texts, ptype):
   t.set_text(str(t.get_text()) + " - " + str(l))


plt.show()


In [None]:
# List of features without label feature
features = variable_list
features.remove('Seen')
#print(features)

X = data[features]
y = np.array(data['Seen'])

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # 80/20 training/test split

In [None]:
# Produce randomforest classifier model and fit to training data
# Keep the random_state=1 in your arguments
# gini or entropy
rf = RandomForestClassifier(n_estimators=100, criterion = 'gini', max_depth=8,random_state=1)

# Fitting takes the input and "truth" data for classification purposes
rf.fit(x_train, y_train)

In [None]:
# Produce predictions for the classification of your training dataset using your model:
y_pred = rf.predict(x_train)

# plot the accuracies of said predictions
print("Accuracy on training dataset:",metrics.accuracy_score(y_train, y_pred))
rf_acc_train = metrics.accuracy_score(y_train, y_pred)
y_pred = rf.predict(x_test)
print("Accuracy on testing dataset:",metrics.accuracy_score(y_test, y_pred))
rf_acc_test = metrics.accuracy_score(y_test, y_pred)

In [None]:
# plot your confusion matrix
cm = confusion_matrix(y_test, y_pred ,normalize = 'true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=ptype)
disp.plot()
plt.show()

In [None]:
# Plot importance
importance = rf.feature_importances_
ytix = features

plt.barh(range(len(features)), importance)
plt.yticks(range(len(features)), features)
plt.xlabel("Importance")
plt.ylabel("Features")
plt.show()