In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder  # Import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Part 1 - Exploratory Analysis

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Projects/Mineral Resource Management/Mines_and_Mineral_Resources.csv')

In [None]:
# Display the first few rows of the dataframe
print(data.head())

In [None]:
# Summary statistics
print(data.describe())

In [None]:
# Data types of columns
print(data.dtypes)

In [None]:
# Check for missing values
print(data.isnull().sum())

In [None]:
# Histogram for AREA_ column
plt.hist(data['AREA_'], bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Mine Area')
plt.xlabel('Area')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Bar plot for categorical features
plt.figure(figsize=(10, 6))
data['FEATTYPE'].value_counts().plot(kind='bar', color='lightcoral')
plt.title('Frequency of Feature Types')
plt.xlabel('Feature Type')
plt.ylabel('Frequency')
plt.show()

# Part 2 - Data Preprocessing

In [None]:
selected_features = ['FEATTYPE', 'SECCLASS', 'STATE', 'ZIP', 'COUNTY', 'NAICSDESCR', 'X', 'Y']
target_variable = 'MINE_TYPE'

In [None]:
data_subset = data[selected_features + [target_variable]].copy()  # Use .copy() to create a copy of the DataFrame

In [None]:
data_subset['MINE_TYPE'] = data_subset['MINE_TYPE'].astype(str)

## Encode Categorical Variables

In [None]:
label_encoders = {}
for feature in selected_features:
    if data_subset[feature].dtype == 'object':
        label_encoders[feature] = LabelEncoder()
        data_subset[feature] = label_encoders[feature].fit_transform(data_subset[feature])

In [None]:
# Split the data into features (X) and target variable (y)
X = data_subset.drop(columns=[target_variable])
y = data_subset[target_variable]

In [None]:
label_encoder_y = LabelEncoder()
y_encoded = label_encoder_y.fit_transform(y)

## Split the Data

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Part 3 - Train the Model

In [None]:
model = XGBClassifier()
model.fit(X_train, y_train)
print("Model trained.")

Model trained.


# Part 4 - Make Predictions

In [None]:
# Make predictions on the testing set
y_pred = model.predict(X_test)

In [None]:
# Decode the predicted labels
y_pred_decoded = label_encoder_y.inverse_transform(y_pred)

# Part 5 - Evaluate Model Performance

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9566666666666667


In [None]:
# Classification report
print(classification_report(y_test, y_pred, target_names=label_encoder_y.classes_))

              precision    recall  f1-score   support

                   0.89      0.85      0.87        20
           1       0.38      0.33      0.35         9
          10       0.00      0.00      0.00         1
          11       1.00      0.97      0.99       155
          12       0.98      1.00      0.99       184
           2       0.75      1.00      0.86         3
           3       0.00      0.00      0.00         0
           4       0.75      0.75      0.75        24
           5       0.96      0.97      0.97        79
           6       0.97      0.97      0.97       125

    accuracy                           0.96       600
   macro avg       0.67      0.69      0.67       600
weighted avg       0.96      0.96      0.96       600



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Analyze feature importance
feature_importance = model.feature_importances_
print("Feature Importance:")
for i, feature in enumerate(X.columns):
    print(feature, ":", feature_importance[i])

Feature Importance:
FEATTYPE : 0.0
SECCLASS : 0.0
STATE : 0.06455076
ZIP : 0.041372996
COUNTY : 0.03131092
NAICSDESCR : 0.8015152
X : 0.038793158
Y : 0.022457026
