In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Load the dataset
train_data = pd.read_csv('main_dataset.csv')

In [3]:
# Ensure no negative values in Id and handle very small values
train_data['Id'] = np.where(train_data['Id'] < 1e-18, 1e-18, train_data['Id'])
train_data['Log_Id'] = np.log10(train_data['Id'])


In [4]:
# Separate inputs and output
X = train_data[['Tsi', 'Tox', 'Nd', 'Ns', 'Vds', 'Vgs']]
y = train_data['Log_Id']


# Polynomial features and scaling
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_poly)

In [5]:

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Define and train the Random Forest model
model = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs = -1)
model.fit(X_train, y_train)


KeyboardInterrupt: 

In [12]:

# Evaluate the model
test_score = model.score(X_test, y_test)
print(f"Random Forest Test Score: {test_score}")
 

Random Forest Test Score: 0.9990852046513249


In [13]:

# Load TCAD data
tcad_data = pd.read_csv('refined_test_dataset.csv')
tcad_data['Id'] = np.where(tcad_data['Id'] < 1e-18, 1e-18, tcad_data['Id'])

In [14]:
import joblib 
joblib.dump(model, 'randomForest_model.joblib')

['randomForest_model.joblib']

In [27]:
print(train_data.columns)

Index(['Tsi', 'Tox', 'Ns', 'Nc', 'Nd', 'Vds', 'Vgs', 'Id', 'Log_Id'], dtype='object')
