In [None]:
# Import required libraries
import numpy as np                            # For numerical operations and array handling
import pandas as pd                           # For data loading and manipulation
from sklearn.model_selection import train_test_split  # To split data into training and test sets
import pickle                                 # To save/load trained models (not used in this snippet yet)
from sklearn import metrics                   # For evaluating model performance
from sklearn.ensemble import RandomForestClassifier  # The classifier we'll use

# Load the dataset from a CSV file into a pandas DataFrame
data = pd.read_csv("C:/Users/98938/Desktop/earthquake-prediction/data/dataset.csv")

# Convert the DataFrame to a NumPy array for easier slicing
data = np.array(data)
print(data)  # Print the entire dataset to check its content

# Split data into features (X) and labels (y)
X = data[:, 0:-1]        # All columns except the last one are features
y = data[:, -1]          # The last column is the target label

# Convert data types to integer (important if data is in string/float format)
y = y.astype('int')
X = X.astype('int')

# Split data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Print training data to verify the split
print(X_train, y_train)

# Initialize the Random Forest Classifier
rfc = RandomForestClassifier()

# Train the classifier on the training data
rfc.fit(X_train, y_train)

# Predict labels for the test set
y_pred = rfc.predict(X_test)

# Calculate and print the accuracy of the model on the test set
print(metrics.accuracy_score(y_test, y_pred))


[[29.06 77.42  5.    2.5 ]
 [19.93 72.92  5.    2.4 ]
 [31.5  74.37 33.    3.4 ]
 ...
 [22.5  88.1  10.    3.6 ]
 [24.6  94.2  54.    3.5 ]
 [14.5  92.9  10.    4.6 ]]
[[ 22  94  10]
 [ 27  75   5]
 [ 34  76  10]
 ...
 [ 31  76   5]
 [ 23  70  10]
 [ 37  72 125]] [4 2 2 ... 3 4 4]
0.5735294117647058
