In [1]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response


uri = "bolt://localhost:7687"
userName = "neo4j"
password = "ashish07"

conn = Neo4jConnection(uri=uri, user=userName, pwd=password)

query_string = """
MATCH (company:Company)-[:HAS_CONSTRUCTION]->(construction:Construction)
RETURN construction.construction AS construction,
       construction.diameter AS diameter,
       construction.linear_density AS linear_density,
       construction.breaking_load AS breaking_load,
       construction.number_of_layers AS number_of_layers,
       construction.total_strands AS total_strands,
       construction.core_diameter AS core_diameter,
       construction.outer_strand_diameter AS outer_strand_diameter,
       company.name AS companyName
"""
data = conn.query(query_string)
conn.close()


data_list = [dict(record) for record in data]
df = pd.DataFrame(data_list)

In [None]:
data

In [3]:
df.head()

Unnamed: 0,construction,diameter,linear_density,breaking_load,number_of_layers,total_strands,core_diameter,outer_strand_diameter,companyName
0,5x0.30,0.81,970.0,1050.0,1,5,0.3,0.3,Bekaert
1,2x0.30,0.6,405.0,445.0,1,2,0.3,0.3,Bekaert
2,2x0.27,0.54,361.0,397.0,1,2,0.27,0.27,Bekaert
3,2+2x0.30,0.78,825.0,890.0,2,6,0.3,0.3,Bekaert
4,2+4x0.22,0.68,670.0,720.0,2,2,0.22,0.22,Bekaert


In [4]:
df.tail()

Unnamed: 0,construction,diameter,linear_density,breaking_load,number_of_layers,total_strands,core_diameter,outer_strand_diameter,companyName
88,4x4x0.20,1.24,4390.0,1180.0,1,16,0.2,0.2,Xingda
89,3+8x0.35,1.44,8440.0,2865.0,2,11,0.35,0.35,Xingda
90,3x4x0.22,1.16,3950.0,950.0,1,12,0.22,0.22,Xingda
91,5x0.38,1.11,4600.0,1120.0,1,5,0.38,0.38,Xingda
92,3x3x0.20,0.92,2420.0,630.0,1,9,0.2,0.2,Xingda


In [5]:
df_copy = df.copy()

In [6]:
features = ['diameter', 'linear_density', 'number_of_layers', 'total_strands', 'core_diameter','outer_strand_diameter']

X = df[features]  
y = df['breaking_load']  


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=30)



model = RandomForestRegressor(n_estimators=80, random_state=42)


model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

for true_value, prediction in zip(y_test, y_pred):
    print(f'Actual Breaking Load (N): {true_value}, Predicted: {prediction:.2f}')

Mean Squared Error: 98108.46515159974
Actual Breaking Load (N): 980.0, Predicted: 1060.64
Actual Breaking Load (N): 610.0, Predicted: 604.38
Actual Breaking Load (N): 1305.0, Predicted: 1131.11
Actual Breaking Load (N): 1400.0, Predicted: 1799.39
Actual Breaking Load (N): 660.0, Predicted: 601.75
Actual Breaking Load (N): 2900.0, Predicted: 3690.89
Actual Breaking Load (N): 1050.0, Predicted: 941.64
Actual Breaking Load (N): 2485.0, Predicted: 3258.71
Actual Breaking Load (N): 1310.0, Predicted: 1204.56
Actual Breaking Load (N): 1510.0, Predicted: 1255.56
Actual Breaking Load (N): 1150.0, Predicted: 1200.28
Actual Breaking Load (N): 445.0, Predicted: 457.21
Actual Breaking Load (N): 575.0, Predicted: 484.59
Actual Breaking Load (N): 1185.0, Predicted: 1188.50
Actual Breaking Load (N): 720.0, Predicted: 565.05
Actual Breaking Load (N): 1980.0, Predicted: 1847.03
Actual Breaking Load (N): 445.0, Predicted: 439.93
Actual Breaking Load (N): 1236.0, Predicted: 1111.64
Actual Breaking Load (

### Multiple Input - Single Output
#### Input:
- a. Diameter
- b. Linear Density
- c. Number of Layers
- d. Total Strands
- e. Core Diameter
- f. Outer Strand Diameter

#### Output:
- a. Predicted Breaking Load

In [7]:
new_construction_features = {
    "diameter": 1.14, 
    "linear_density": 3200, 
    "number_of_layers": 1, 
    "total_strands": 8,
    "core_diameter": 0.25, 
    "outer_strand_diameter": 0.25
}

# Convert to DataFrame
features = ["diameter", "linear_density", "number_of_layers", "total_strands", "core_diameter", "outer_strand_diameter"]
new_construction_df = pd.DataFrame([new_construction_features], columns=features)

# Make prediction
predicted_breakingLoad = model.predict(new_construction_df)
print("Actual Breaking Load: 870")
print(f"Predicted Breaking Load: {predicted_breakingLoad[0]}")

Actual Breaking Load: 870
Predicted Breaking Load: 943.5


### Multiple Input - Multiple Output
#### Input:
- a. Total Strands
- b. Number of Layers

#### Output:
- a. Predicted Breaking Load
- b. Predicted Diameter
- c. Predicted Density

In [8]:
features_ = ['total_strands', 'number_of_layers','core_diameter','outer_strand_diameter','diameter']
targets_ = ['breaking_load', 'linear_density']

X = df[features_].values
Y = df[targets_].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

model_ = RandomForestRegressor(n_estimators=80, random_state=42)

model_.fit(X_train, Y_train)

# new_construction = [total_strands, number_of_layers]
new_construction = np.array([[8, 1,0.25, 0.25,0.87606161]]) 

predicted_properties = model_.predict(new_construction)

print(f"Predicted Properties: Breaking Load: {predicted_properties[0][0]}, Density: {predicted_properties[0][1]}")
print("Actual Properties: Breaking Load:870 , Density:3200 ")

Predicted Properties: Breaking Load: 982.4605654761906, Density: 2846.44375
Actual Properties: Breaking Load:870 , Density:3200 


# Prediction of Total Diameter
### Model Trained on
- Total Strands
- Total Layers
- Core Diameter
- Outer Strand Diameter

##### Manual Test Set
- 4x2x0.25 	1.14	870	3200	1	8	0.25	0.25	Xingda
- 5x0.38 	1.22	1190	4530	1	5	0.38
- 3x0.175+6x0.32	1.04	1462.5	4420	2	9	0.175	0.32

In [9]:
df = pd.read_excel('C://Users//ashis//OneDrive//Desktop//Bekaert//Innovation//structred data//prediction.xlsx')

X = df[['total_strands', 'number_of_layers', 'core_diameter', 'outer_strand_diameter']]
y = df['diameter']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

print('Model Coefficients:', model.coef_)

results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

print(results_df)

new_data_dict = {
    'total_strands': [8],  
    'number_of_layers': [1],  
    'core_diameter': [0.25],  
    'outer_strand_diameter': [0.25]  
}

new_data = pd.DataFrame(new_data_dict)

predicted_diameter = model.predict(new_data)

print(F"Predicted Diameter for 4x2x0.25 is: {predicted_diameter}")
print("Actual Diameter for 4x2x0.25 is: 1.14")

Mean Squared Error: 0.01984620032143284
Model Coefficients: [ 0.06591919 -0.01293107  1.25289587  2.6724197 ]
    Actual  Predicted
40    0.65   0.599454
22    0.81   0.874570
55    0.60   0.676812
72    1.58   1.505260
0     0.75   0.729800
26    1.34   1.536745
39    0.83   0.874226
67    1.44   1.453420
10    0.60   0.676812
44    0.63   0.533535
83    1.52   1.615252
35    0.94   1.070836
90    2.30   2.127034
62    0.87   0.923502
12    0.58   0.624972
4     0.48   0.417267
18    0.94   0.991985
28    1.18   1.747090
49    0.80   1.056757
65    1.16   1.178648
15    1.04   0.943473
68    1.10   1.111237
78    1.04   0.943473
30    1.44   1.453420
33    0.67   0.678304
11    0.60   0.676812
66    1.35   1.374913
69    1.19   1.189743
Predicted Diameter for 4x2x0.25 is: [0.87606161]
Actual Diameter for 4x2x0.25 is: 1.14
