In [1]:
import tensorflow as tf
import GPUtil

# Check if TensorFlow sees the GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"TensorFlow is using the following GPU(s):")
    for gpu in gpus:
        print(gpu)
else:
    print("No GPU detected by TensorFlow.")

# Use GPUtil to get detailed GPU information
gpus = GPUtil.getGPUs()
for gpu in gpus:
    print(f"GPU ID: {gpu.id}, Name: {gpu.name}, Load: {gpu.load*100}%, Memory Free: {gpu.memoryFree}MB, Memory Used: {gpu.memoryUsed}MB, Memory Total: {gpu.memoryTotal}MB")


TensorFlow is using the following GPU(s):
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
GPU ID: 0, Name: NVIDIA GeForce RTX 3050 Laptop GPU, Load: 0.0%, Memory Free: 3964.0MB, Memory Used: 0.0MB, Memory Total: 4096.0MB


OES project open pit mining project

In [2]:
#importing neccessary Library
import pandas as pd     
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import xgboost as xgb


In [12]:
# Load the telemetry data (training)
telemetry_train = pd.read_csv(r'D:\DataScience\openpitmining\telemetry_for_operations_training.csv')

# Load the operational labels (training)
operations_labels_train = pd.read_csv('D:\\DataScience\\openpitmining\\operations_labels_training.csv')

# Load the telemetry data (validation/test set)
telemetry_validation = pd.read_csv('D:\\DataScience\\openpitmining\\telemetry_for_operations_validation.csv')


In [18]:
#checking the null values
print(telemetry_train['create_dt'].isnull().sum())
print(operations_labels_train['start_time'].isnull().sum())
# Impute missing dates with a default value (use with caution)
telemetry_train['create_dt'].fillna(pd.Timestamp('2023-01-01'), inplace=True)
operations_labels_train['start_time'].fillna(pd.Timestamp('2023-01-01'), inplace=True)
print(telemetry_train['create_dt'].isnull().sum())
print(operations_labels_train['start_time'].isnull().sum())

11
0
0
0


In [20]:
# Merge telemetry data with operational state labels on time and truck name
# We'll use a time-based merge
telemetry_train['create_dt'] = pd.to_datetime(telemetry_train['create_dt'], errors='coerce')
operations_labels_train['start_time'] = pd.to_datetime(operations_labels_train['start_time'], errors='coerce')


merged_data = pd.merge_asof(
    telemetry_train.sort_values('create_dt'),
    operations_labels_train.sort_values('start_time'),
    left_on='create_dt',
    right_on='start_time',
    by='mdm_object_name',
    direction='backward'
)

# Drop any rows where the merge didn’t work
merged_data = merged_data.dropna(subset=['operation_kind_id'])


In [23]:
# Assuming merged_data is your DataFrame and it already contains 'lat' and 'lon' columns

# Calculate new features (e.g., speed change)
merged_data['speed_change'] = merged_data['speed_gps'].diff().fillna(0)

# Haversine function to calculate distance between two lat/lon points
def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    return 6371 * 2 * np.arcsin(np.sqrt(a))  # Earth radius = 6371 km

# Create a new feature: distance traveled between consecutive GPS points
merged_data['distance_traveled'] = pd.Series(haversine(
    merged_data['lat'], merged_data['lon'],
    merged_data['lat'].shift(1), merged_data['lon'].shift(1)
)).fillna(0)

In [24]:
# Select the features (X) and the target (y)
X = merged_data[['speed_gps', 'accel_forward_nn', 'accel_braking_nn', 'accel_angular_nn', 'accel_vertical_nn', 'speed_change', 'distance_traveled']]
y = merged_data['operation_kind_id']

# Split into training and test sets for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [25]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [32]:
# Print unique classes before filtering
print(np.unique(y_train))

# Filter out the invalid class
valid_classes = [0, 1, 2, 3]
mask = y_train.isin(valid_classes)
y_train_filtered = y_train[mask]
X_train_scaled_filtered = X_train_scaled[mask]

# Check unique classes again
print(np.unique(y_train_filtered))  # Should output [0, 1, 2, 3]


[0. 1. 2. 3. 5.]
[0. 1. 2. 3.]


In [33]:
y_train_filtered = y_train_filtered.astype(int)


In [35]:
# Train an XGBoost model
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=5, eval_metric='mlogloss')
xgb_model.fit(X_train_scaled_filtered, y_train_filtered)

In [36]:
# Make predictions on the test set
y_pred = xgb_model.predict(X_test_scaled)

# Calculate the weighted F1 score
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'Weighted F1 Score: {f1}')


Weighted F1 Score: 0.4338022026306958


In [38]:
# Create new features for validation data
telemetry_validation['speed_change'] = telemetry_validation['speed_gps'].diff().fillna(0)

# Create a new feature: distance traveled between consecutive GPS points
telemetry_validation['distance_traveled'] = pd.Series(haversine(
    telemetry_validation['lat'], telemetry_validation['lon'],
    telemetry_validation['lat'].shift(1), telemetry_validation['lon'].shift(1)
)).fillna(0)

# Select the features
X_validation = telemetry_validation[['speed_gps', 'accel_forward_nn', 'accel_braking_nn', 'accel_angular_nn', 'accel_vertical_nn', 'speed_change', 'distance_traveled']]


In [40]:
# Make sure you have scaled your validation features using the same scaler
from sklearn.preprocessing import StandardScalerd

# Assuming you have a scaler already fitted on training data
# If not, fit it on the training data first
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform on training data

# Transform the validation data using the same scaler
X_validation_scaled = scaler.transform(X_validation)  # Transform only

# Make predictions on the scaled validation data
y_validation_pred = xgb_model.predict(X_validation_scaled)

# Prepare the submission file
submission = telemetry_validation[['create_dt', 'mdm_object_name']].copy()  # Use copy to avoid SettingWithCopyWarning
submission['operation_kind_id'] = y_validation_pred

# Ensure the shape is correct
expected_shape = (260111, 3)  # Replace with the actual number of rows if different
assert submission.shape == expected_shape, f"Expected shape {expected_shape}, but got {submission.shape}"

# Save the submission file as CSV
submission.to_csv('submission.csv', index=False)

print("Submission file saved as 'submission.csv'.")


Submission file saved as 'submission.csv'.
