# **Importing Libraries**

**Basic Libraries**

In [35]:
import pandas as pd
import numpy as np

**Libraries for visualisation and analysis**

In [36]:
#libraries used for statistical graphics in python
import seaborn as sb
import matplotlib.pyplot as plt

**Data Preprocessing Libraries**

In [37]:
#Libraries used for data preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# **Importing Models**

In [38]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

#for model evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# **Data Analysis**

**Reading the Data**

In [41]:
Alloy_data_path = r"https://raw.githubusercontent.com/DarshanGoodGuy/IITISOC25/refs/heads/main/final_dataset.csv?token=GHSAT0AAAAAADGBXQXYNI6DOV7PV4XYQ7AW2CYDWSA"
Alloy_data = pd.read_csv(Alloy_data_path)

**Analysing the Data**

In [42]:
Alloy_data.head()

Unnamed: 0,FORMULA,Co,Cr,Fe,Ni,Mn,Nb,Ti,Al,C,...,PROPERTY: Microstructure,PROPERTY: Processing method,PROPERTY: BCC/FCC/other,PROPERTY: Calculated Density (g/cm$^3$),PROPERTY: Type of test,PROPERTY: Test temperature ($^\circ$C),PROPERTY: YS (MPa),PROPERTY: UTS (MPa),PROPERTY: Elongation (%),PROPERTY: Calculated Young modulus (GPa)
0,Co1 Cr1 Fe1 Ni1,26.138193,23.061468,24.768591,26.031747,,,,,,...,FCC,WROUGHT,FCC,8.2,T,25.0,274.0,708.0,39.0,226.0
1,Co1 Cr1 Mn1 Ni1,26.243766,23.154613,,26.13689,24.464731,,,,,...,FCC,WROUGHT,FCC,8.1,T,25.0,282.0,694.0,44.0,222.0
2,Co1 Cr1 Ni1,34.743724,30.654043,,34.602233,,,,,,...,FCC,WROUGHT,FCC,8.3,T,25.0,300.0,860.0,60.0,231.0
3,Co1 Fe1 Mn1 Ni1,25.801523,,24.449562,25.696448,24.052467,,,,,...,FCC,WROUGHT,FCC,8.2,T,25.0,170.0,550.0,41.0,204.0
4,Co1 Fe1 Ni1,33.972825,,32.192701,33.834474,,,,,,...,FCC,WROUGHT,FCC,8.5,T,25.0,211.0,513.0,31.0,207.0


In [None]:
Alloy_data.shape

In [None]:
Alloy_data.info()

**Filling the missing values with mean**

In [43]:
Alloy_data.isna().sum()

Unnamed: 0,0
FORMULA,0
Co,559
Cr,471
Fe,520
Ni,499
Mn,816
Nb,500
Ti,461
Al,604
C,929


In [50]:
Alloy_data.iloc[:, -4:] = Alloy_data.iloc[:, -4:].fillna(Alloy_data.iloc[:, -4:].mean())
Alloy_data.head()

Unnamed: 0,FORMULA,Co,Cr,Fe,Ni,Mn,Nb,Ti,Al,C,...,PROPERTY: Microstructure,PROPERTY: Processing method,PROPERTY: BCC/FCC/other,PROPERTY: Calculated Density (g/cm$^3$),PROPERTY: Type of test,PROPERTY: Test temperature ($^\circ$C),PROPERTY: YS (MPa),PROPERTY: UTS (MPa),PROPERTY: Elongation (%),PROPERTY: Calculated Young modulus (GPa)
0,Co1 Cr1 Fe1 Ni1,26.138193,23.061468,24.768591,26.031747,,,,,,...,FCC,WROUGHT,FCC,8.2,T,25.0,274.0,708.0,39.0,226.0
1,Co1 Cr1 Mn1 Ni1,26.243766,23.154613,,26.13689,24.464731,,,,,...,FCC,WROUGHT,FCC,8.1,T,25.0,282.0,694.0,44.0,222.0
2,Co1 Cr1 Ni1,34.743724,30.654043,,34.602233,,,,,,...,FCC,WROUGHT,FCC,8.3,T,25.0,300.0,860.0,60.0,231.0
3,Co1 Fe1 Mn1 Ni1,25.801523,,24.449562,25.696448,24.052467,,,,,...,FCC,WROUGHT,FCC,8.2,T,25.0,170.0,550.0,41.0,204.0
4,Co1 Fe1 Ni1,33.972825,,32.192701,33.834474,,,,,,...,FCC,WROUGHT,FCC,8.5,T,25.0,211.0,513.0,31.0,207.0


In [51]:
Alloy_data.isna().sum()

Unnamed: 0,0
FORMULA,0
Co,559
Cr,471
Fe,520
Ni,499
Mn,816
Nb,500
Ti,461
Al,604
C,929


# **Data Standardisation**

In [52]:
Alloy_data.shape

(948, 37)

In [57]:
#Assigning Feature and Target variables
X = Alloy_data.iloc[:,1:27] #Feature Columns
y = Alloy_data.iloc[:,27:38]  #Target Columns

In [58]:
X.head()

Unnamed: 0,Co,Cr,Fe,Ni,Mn,Nb,Ti,Al,C,Mo,...,Zn,Ta,Zr,Hf,W,Re,Ca,Y,Pd,Sc
0,26.138193,23.061468,24.768591,26.031747,,,,,,,...,,,,,,,,,,
1,26.243766,23.154613,,26.13689,24.464731,,,,,,...,,,,,,,,,,
2,34.743724,30.654043,,34.602233,,,,,,,...,,,,,,,,,,
3,25.801523,,24.449562,25.696448,24.052467,,,,,,...,,,,,,,,,,
4,33.972825,,32.192701,33.834474,,,,,,,...,,,,,,,,,,


In [55]:
y.head()

Unnamed: 0,PROPERTY: Microstructure,PROPERTY: Processing method,PROPERTY: BCC/FCC/other,PROPERTY: Calculated Density (g/cm$^3$),PROPERTY: Type of test,PROPERTY: Test temperature ($^\circ$C),PROPERTY: YS (MPa),PROPERTY: UTS (MPa),PROPERTY: Elongation (%),PROPERTY: Calculated Young modulus (GPa)
0,FCC,WROUGHT,FCC,8.2,T,25.0,274.0,708.0,39.0,226.0
1,FCC,WROUGHT,FCC,8.1,T,25.0,282.0,694.0,44.0,222.0
2,FCC,WROUGHT,FCC,8.3,T,25.0,300.0,860.0,60.0,231.0
3,FCC,WROUGHT,FCC,8.2,T,25.0,170.0,550.0,41.0,204.0
4,FCC,WROUGHT,FCC,8.5,T,25.0,211.0,513.0,31.0,207.0


**Scaling of data**

In [59]:
sc = MinMaxScaler()

X_scaled = pd.DataFrame(sc.fit_transform(X),columns=X.columns);
y_scaled = pd.DataFrame(sc.fit_transform(y),columns=y.columns);

ValueError: could not convert string to float: 'FCC'