In [1]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('2STAGEOPAMP_DATASET.csv')

# Display the first few rows and data info
print(data.head())
print(data.info())
print(data.describe())

# Check for missing values
print("Missing values:", data.isnull().sum())

# Handle infinite values, if any
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()  #remove null containing rows (simplest way)


        Wi1       Wi2       Wi3       Wi5  DC Gain          ft         f3  \
0  0.000001  0.000021  0.000091  0.000060  20.0438   6080000.0   598810.0   
1  0.000091  0.000100  0.000011  0.000007  20.0816  12100000.0  1200000.0   
2  0.000100  0.000091  0.000011  0.000007  20.1075  13200000.0  1310000.0   
3  0.000081  0.000100  0.000011  0.000007  20.4283  12100000.0  1140000.0   
4  0.000100  0.000081  0.000011  0.000008  20.4548  14700000.0  1390000.0   

   Vcm     Pdiss       Is4       Gm6       Gm4       Wi4  Asp_1  Asp_2  Asp_3  \
0  1.6  0.000068  0.000011  0.000329  0.000252  0.000027    2.0   42.0  182.0   
1  1.6  0.000066  0.000012  0.000655  0.000554  0.000118  182.0  200.0   22.0   
2  1.6  0.000069  0.000012  0.000716  0.000529  0.000123  200.0  182.0   22.0   
3  1.6  0.000066  0.000011  0.000652  0.000554  0.000118  162.0  200.0   22.0   
4  1.6  0.000072  0.000012  0.000795  0.000499  0.000129  200.0  162.0   22.0   

   Asp_4  Asp_5   Abs_Gain     Delay  
0   54.8  1

In [5]:
# Define features (X) and target variables (y)
y = data[['Wi1', 'Wi2', 'Wi3', 'Wi5', 'Wi4']]  # Transistor sizes and aspect ratios
X = data[['DC Gain', 'ft', 'f3', 'Pdiss']] # Performance metrics

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train=y_train*10**6
y_test=y_test*10**6

In [6]:
from sklearn.ensemble import RandomForestRegressor

# Initialize and train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42) # You can tune n_estimators
model.fit(X_train, y_train)


In [7]:
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

#If you want to evaluate each output seperately
for i,col in enumerate(y.columns):
  mse = mean_squared_error(y_test.iloc[:,i], y_pred[:,i])
  r2 = r2_score(y_test.iloc[:,i], y_pred[:,i])
  print(f"col: {col}, Mean Squared Error: {mse}, R-squared: {r2}")


Mean Squared Error: 205.7797702528851
R-squared: 0.8093398160131882
col: Wi1, Mean Squared Error: 187.1520163814181, R-squared: 0.8138349928021337
col: Wi2, Mean Squared Error: 147.2255349633252, R-squared: 0.8018922312777039
col: Wi3, Mean Squared Error: 174.49286650366747, R-squared: 0.7989686936874084
col: Wi5, Mean Squared Error: 104.62206945513442, R-squared: 0.6817547839884303
col: Wi4, Mean Squared Error: 415.40636396088024, R-squared: 0.9502483783102651


In [8]:
#predict
single_sample = X_test[0].reshape(1, -1)
prediction = model.predict(single_sample)
print("prediction",prediction)


prediction [[35.56  20.39  51.63  58.312 61.258]]


In [9]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                           param_grid=param_grid,
                           cv=3,  # Number of cross-validation folds
                           scoring='neg_mean_squared_error',
                           verbose=2,
                           n_jobs=-1)

# Perform grid search
grid_search.fit(X_train, y_train)

# Print the best parameters
print("Best parameters:", grid_search.best_params_)

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (Best Model): {mse}")
print(f"R-squared (Best Model): {r2}")

#If you want to evaluate each output seperately
for i,col in enumerate(y.columns):
  mse = mean_squared_error(y_test.iloc[:,i], y_pred[:,i])
  r2 = r2_score(y_test.iloc[:,i], y_pred[:,i])
  print(f"col: {col}, Mean Squared Error: {mse}, R-squared: {r2}")


Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Mean Squared Error (Best Model): 210.2409656174315
R-squared (Best Model): 0.8058116480767726
col: Wi1, Mean Squared Error: 187.25954740899215, R-squared: 0.8137280288756455
col: Wi2, Mean Squared Error: 150.9393838401589, R-squared: 0.7968948487615215
col: Wi3, Mean Squared Error: 178.95261884168707, R-squared: 0.7938306622233894
col: Wi5, Mean Squared Error: 106.57914860416199, R-squared: 0.6758016320408757
col: Wi4, Mean Squared Error: 427.47412939215735, R-squared: 0.9488030684824313


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Load the dataset
data = pd.read_csv('2STAGEOPAMP_DATASET.csv')

# Preprocessing: Handling missing values
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()

# Define features (X) and target variables (y)
y = data[['Wi1', 'Wi2', 'Wi3', 'Wi5', 'Wi4','Is4']]  # Transistor sizes and aspect ratios
X = data[['DC Gain', 'ft', 'Pdiss', 'Vcm']] # Performance metrics

#Split into 60/20/20.
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42) #40% will be split again
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale
# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate on validation set
y_val_pred = model.predict(X_val)
mse_val = mean_squared_error(y_val, y_val_pred)
r2_val = r2_score(y_val, y_val_pred)

print("Validation Set Evaluation:")
print(f"Mean Squared Error: {mse_val}")
print(f"R-squared: {r2_val}")

# Evaluate on test set
y_test_pred = model.predict(X_test)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

print("\nTest Set Evaluation:")
print(f"Mean Squared Error: {mse_test}")
print(f"R-squared: {r2_test}")


Validation Set Evaluation:
Mean Squared Error: 2.0006009105814592e-10
R-squared: 0.796395754197316

Test Set Evaluation:
Mean Squared Error: 2.132567418622169e-10
R-squared: 0.7982508343825138


In [11]:
y_test

Unnamed: 0,Wi1,Wi2,Wi3,Wi5,Wi4,Is4
1027,0.000011,0.000061,0.000041,0.000040,0.000276,0.000010
1480,0.000100,0.000091,0.000081,0.000045,0.000352,0.000012
243,0.000061,0.000100,0.000021,0.000013,0.000192,0.000012
1998,0.000081,0.000081,0.000100,0.000058,0.000323,0.000010
1419,0.000001,0.000071,0.000100,0.000064,0.000177,0.000009
...,...,...,...,...,...,...
1744,0.000100,0.000031,0.000051,0.000065,0.000261,0.000010
1532,0.000041,0.000061,0.000061,0.000048,0.000320,0.000012
903,0.000100,0.000041,0.000100,0.000068,0.000207,0.000010
73,0.000041,0.000091,0.000011,0.000007,0.000116,0.000011


In [12]:
pd.DataFrame(y_test_pred)

Unnamed: 0,0,1,2,3,4,5
0,3.982000e-05,0.000071,0.000055,0.000043,0.000300,0.000010
1,8.343000e-05,0.000084,0.000086,0.000048,0.000339,0.000012
2,5.647000e-05,0.000090,0.000031,0.000019,0.000182,0.000012
3,7.524000e-05,0.000086,0.000093,0.000055,0.000332,0.000010
4,1.000000e-06,0.000089,0.000084,0.000053,0.000196,0.000009
...,...,...,...,...,...,...
404,7.108000e-05,0.000036,0.000048,0.000059,0.000276,0.000010
405,4.970000e-05,0.000062,0.000059,0.000047,0.000324,0.000012
406,8.079000e-05,0.000046,0.000094,0.000064,0.000218,0.000010
407,4.369000e-05,0.000084,0.000017,0.000011,0.000112,0.000011


In [13]:
from xgboost import XGBRegressor
xgb_model = XGBRegressor(n_estimators=100, max_depth=10, learning_rate=0.05, random_state=42)
xgb_model.fit(X_train, y_train)

In [14]:
y_test_pred2 = xgb_model.predict(X_test)
mse_test2 = mean_squared_error(y_test, y_test_pred2)
r2_test2 = r2_score(y_test, y_test_pred2)

In [15]:
print("\nTest Set Evaluation:")
print(f"Mean Squared Error: {mse_test}")
print(f"R-squared: {r2_test}")


Test Set Evaluation:
Mean Squared Error: 2.132567418622169e-10
R-squared: 0.7982508343825138


In [16]:
#'DC Gain', 'ft', 'Pdiss', 'Vcm'
chat = [[60,100,2,1.2]]
y_test_pred_chat = model.predict(chat)




In [17]:
pd.DataFrame(y_test_pred_chat,columns=y_test.columns)

Unnamed: 0,Wi1,Wi2,Wi3,Wi5,Wi4,Is4
0,3.9e-05,2e-05,1.5e-05,1.8e-05,4.2e-05,1e-05


In [18]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits=5, test_size=0.2,random_state=0)

cross_val_score(RandomForestRegressor(),X,y,cv = cv)

array([0.80108764, 0.83171237, 0.81195593, 0.79706798, 0.81619047])