## Importing necessary Libraries

In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np

##Reading Excel file and show it in dataframe

In [None]:
origin_df = pd.read_excel("/content/drive/MyDrive/2nd year/tri 3/AI/project/polymers.xlsx")
print("Original DataFrame:")
print(origin_df) #display the original data frame

Original DataFrame:
                            Material  Modulus of Elasticity (GPA)  \
0                              Epoxy                        2.410   
1                  Polycarbonate(PC)                        2.380   
2                          Nylon 6,6                        2.690   
3                           Phenolic                        3.795   
4               Polyester(thermoset)                        3.240   
5                               LDPE                        0.230   
6                               HDPE                        1.080   
7                  Polypropylene(PP)                        1.350   
8                    Polystyrene(PS)                        2.780   
9      Polytetrafluoroethylene(PTFE)                        0.480   
10          Polyvinyl chloride (PVC)                        3.280   
11  Polybutylene terephthalate (PBT)                        2.470   
12  Polyethylene terephthalate (PET)                        3.450   
13            

In [None]:
column_names = origin_df.columns.tolist()

# Exclude the first column
properties = column_names[1:]
print('Properties of Polymers')
print(properties)

Properties of Polymers
['Modulus of Elasticity (GPA)', 'Coefficient of Thermal Expansion', 'Tensile Strength(MPA)', 'Thermal Conductivity (W/m-K)', 'Specific Heat(J/kg-K)', 'Density(g/cm^3)', 'Cost(Raw)$/kg']


##getting user input and appending it in original dataframe

In [None]:
user_input_dict = {"Material": "user_properties"} # Dictionary to store user inputs
for prop in properties:
  user_properties = input(f"{prop} (or press Enter to skip): ")
  if user_properties == "":
    continue
  user_input_dict[prop] = user_properties

user_input_df = pd.DataFrame([user_input_dict]) # Convert the dictionary to a DataFrame

new_df = pd.concat([origin_df, user_input_df], ignore_index=True) # Append the new DataFrame to the original DataFrame
print(new_df)

Modulus of Elasticity (GPA) (or press Enter to skip): 
Coefficient of Thermal Expansion (or press Enter to skip): 145
Tensile Strength(MPA) (or press Enter to skip): 90
Thermal Conductivity (W/m-K) (or press Enter to skip): 0.25
Specific Heat(J/kg-K) (or press Enter to skip): 1600
Density(g/cm^3) (or press Enter to skip): 1.1
Cost(Raw)$/kg (or press Enter to skip): 2.5
                            Material  Modulus of Elasticity (GPA)  \
0                              Epoxy                        2.410   
1                  Polycarbonate(PC)                        2.380   
2                          Nylon 6,6                        2.690   
3                           Phenolic                        3.795   
4               Polyester(thermoset)                        3.240   
5                               LDPE                        0.230   
6                               HDPE                        1.080   
7                  Polypropylene(PP)                        1.350   
8      

##data cleaning

In [None]:
clean_df = new_df.dropna(axis=1) # Drop columns with NaN values

# Convert all numbers in columns (except the first column) to float values
def convert_to_float(value):
    try:
        return float(value)
    except ValueError:
        return value

clean_df.iloc[:, 1:] = clean_df.iloc[:, 1:].applymap(convert_to_float)

# Display the updated DataFrame
print("Cleaned DataFrame:")
print(clean_df)

Cleaned DataFrame:
                            Material Coefficient of Thermal Expansion  \
0                              Epoxy                             99.0   
1                  Polycarbonate(PC)                            122.0   
2                          Nylon 6,6                            144.0   
3                           Phenolic                            122.0   
4               Polyester(thermoset)                            140.0   
5                               LDPE                            290.0   
6                               HDPE                            152.0   
7                  Polypropylene(PP)                            163.0   
8                    Polystyrene(PS)                            120.0   
9      Polytetrafluoroethylene(PTFE)                            171.0   
10          Polyvinyl chloride (PVC)                            135.0   
11  Polybutylene terephthalate (PBT)                            139.5   
12  Polyethylene terephthalate (

##normalizing the data within the range of 0 and 1

In [57]:
material_names = clean_df.iloc[:, 0] # Extract material names column from the original DataFrame

numeric_df = clean_df.iloc[:, 1:] # Select the remaining numeric columns for normalization

# Normalize the numeric data using Min-Max scaling
normalized_df = (numeric_df - numeric_df.min()) / (numeric_df.max() - numeric_df.min())
#print(normalized_df)

# Combine the material names with the normalized numeric data
df = pd.concat([material_names, normalized_df], axis=1)

# Display the normalized DataFrame
print("\nNormalized DataFrame:")
print(df)


Normalized DataFrame:
                            Material Coefficient of Thermal Expansion  \
0                              Epoxy                              0.0   
1                  Polycarbonate(PC)                         0.120419   
2                          Nylon 6,6                         0.235602   
3                           Phenolic                         0.120419   
4               Polyester(thermoset)                          0.21466   
5                               LDPE                              1.0   
6                               HDPE                         0.277487   
7                  Polypropylene(PP)                         0.335079   
8                    Polystyrene(PS)                         0.109948   
9      Polytetrafluoroethylene(PTFE)                         0.376963   
10          Polyvinyl chloride (PVC)                         0.188482   
11  Polybutylene terephthalate (PBT)                         0.212042   
12  Polyethylene terephthala

##calculating euclidean distances

In [59]:
# Get the last row (target row) and upper rows (excluding the last row)
target_row = normalized_df.iloc[-1].values.astype(float)
upper_rows = normalized_df.iloc[:-1].values.astype(float)

# Calculate Euclidean distances
euclidean_distances = np.linalg.norm(upper_rows - target_row, axis=1)

# Display Euclidean distances
print("Euclidean Distances:")
print(euclidean_distances)

# Add Euclidean distances as a new column to the DataFrame
df['Euclidean Distance'] = np.append(euclidean_distances, np.nan)

# Display the updated DataFrame
print("Updated DataFrame:")
print(df)

Euclidean Distances:
[0.6455846  0.62721689 0.09187384 0.8232707  0.67170783 1.32766575
 1.08972107 0.86319589 0.77925802 1.53025064 0.72511133 0.5044659
 0.62601069 0.44395135]
Updated DataFrame:
                            Material Coefficient of Thermal Expansion  \
0                              Epoxy                              0.0   
1                  Polycarbonate(PC)                         0.120419   
2                          Nylon 6,6                         0.235602   
3                           Phenolic                         0.120419   
4               Polyester(thermoset)                          0.21466   
5                               LDPE                              1.0   
6                               HDPE                         0.277487   
7                  Polypropylene(PP)                         0.335079   
8                    Polystyrene(PS)                         0.109948   
9      Polytetrafluoroethylene(PTFE)                         0.376963   


##model evaluation

In [61]:
# Sort the DataFrame by the last column, moving rows with 'nan' to the top
sorted_df = df.sort_values(by=df.columns[-1], na_position='first')

# Display the sorted DataFrame
print("Sorted DataFrame:")
print(sorted_df)

# Select only the top four rows
top_four_df = sorted_df.head(4)

# Display the top four rows
print("Selected Polymers:")
print(top_four_df)

Sorted DataFrame:
                            Material Coefficient of Thermal Expansion  \
14                   user_properties                         0.240838   
2                          Nylon 6,6                         0.235602   
13                              PMMA                         0.141361   
11  Polybutylene terephthalate (PBT)                         0.212042   
12  Polyethylene terephthalate (PET)                         0.094241   
1                  Polycarbonate(PC)                         0.120419   
0                              Epoxy                              0.0   
4               Polyester(thermoset)                          0.21466   
10          Polyvinyl chloride (PVC)                         0.188482   
8                    Polystyrene(PS)                         0.109948   
3                           Phenolic                         0.120419   
7                  Polypropylene(PP)                         0.335079   
6                               H