# Regression on Mohs Hardness 

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler

### Get Training Dataset

In [2]:
# Get data
df = pd.read_csv('data/df_selected.csv')
df.head()

Unnamed: 0,id,allelectrons_Average,density_Average,val_e_Average,atomicweight_Average,ionenergy_Average,el_neg_chi_Average,Hardness
0,0,10.0,0.91457,4.8,20.612526,11.0881,2.766,6.0
1,1,10.0,0.7176,4.8,20.298893,12.04083,2.755,6.5
2,2,15.6,1.50633,5.6,33.739258,12.0863,2.828,2.5
3,3,10.0,0.78937,4.8,20.213349,10.9485,2.648,6.0
4,4,11.6,1.86481,4.8,24.988133,11.82448,2.766,6.0


Inputs & Outputs to the model

In [3]:
X = df[['allelectrons_Average', 'density_Average', 'val_e_Average', 
        'atomicweight_Average', 'ionenergy_Average', 'el_neg_chi_Average']]

Y = df[['Hardness']].values.reshape(-1, 1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [5]:
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

### Standardize the features

In [6]:
scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

Convert data to PyTorch tensors

In [7]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)

### Define the Models

In [8]:
#------------------DeepRegression Model--------------------#
class DeepRegressionModel(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2,drop):
        super(DeepRegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(drop)
        self.fc3 = nn.Linear(hidden_size2, 1)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x
    
#------------------Linear Regression Model--------------------#
class LinearRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.linear(x)

Instantiate the models

In [9]:
# Deep
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
drop = 0.1
deep_model = DeepRegressionModel(input_size, hidden_size1, hidden_size2,drop)

#Linear 
input_size = X_train.shape[1]
linear_model = LinearRegressionModel(input_size)

In [10]:
# Select the model
model = linear_model

In [11]:

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) #can select any model

### Train

In [12]:
num_epochs = 10000

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1000/10000], Loss: 0.0290
Epoch [2000/10000], Loss: 0.0283
Epoch [3000/10000], Loss: 0.0282
Epoch [4000/10000], Loss: 0.0280
Epoch [5000/10000], Loss: 0.0279
Epoch [6000/10000], Loss: 0.0278
Epoch [7000/10000], Loss: 0.0277
Epoch [8000/10000], Loss: 0.0277
Epoch [9000/10000], Loss: 0.0277
Epoch [10000/10000], Loss: 0.0277


In [13]:
#Evaluation
model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor)
    y_pred = scaler_y.inverse_transform(y_pred_tensor.numpy())
    y_true = scaler_y.inverse_transform(y_test_tensor.numpy())

mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 2.1708645820617676
R-squared: 0.2013219015452523


In [14]:
torch.save(model.state_dict(), 'model_weights.pth')