<a href="https://colab.research.google.com/github/Gressling/digitalchemistry.org/blob/main/batch_analysis/Correlation_Between_Temperature%2C_pH%2C_and_Yield.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# go

In [2]:
import sqlite3
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import plotly.graph_objects as go
import pandas as pd

# Create and Set Up the Databases
conn_kg = sqlite3.connect('knowledge_graph.db')
cursor_kg = conn_kg.cursor()
conn_exec = sqlite3.connect('execution_layer.db')
cursor_exec = conn_exec.cursor()

# Create tables for Knowledge Graph
cursor_kg.execute("CREATE TABLE IF NOT EXISTS reactions (id INTEGER PRIMARY KEY, name TEXT)")
cursor_kg.execute("INSERT INTO reactions (name) VALUES ('Nitration of Benzene')")
conn_kg.commit()

# Create tables for Execution Layer
cursor_exec.execute("CREATE TABLE IF NOT EXISTS batches (id INTEGER PRIMARY KEY, date_time TEXT, actual_yield REAL)")
cursor_exec.execute("CREATE TABLE IF NOT EXISTS batch_parameters (id INTEGER PRIMARY KEY, batch_id INTEGER, temperature REAL, pressure REAL, pH REAL, mixing_speed REAL, FOREIGN KEY (batch_id) REFERENCES batches (id))")
cursor_exec.execute("CREATE TABLE IF NOT EXISTS batch_quality_attributes (id INTEGER PRIMARY KEY, batch_id INTEGER, purity REAL, potency TEXT, stability TEXT, FOREIGN KEY (batch_id) REFERENCES batches (id))")

# Function to calculate actual yield based on temperature and pH
def calculate_yield(temperature, pH):
    temp_effect = (temperature - 55) * 0.5
    pH_effect = (pH - 2.5) * -1.5
    yield_effect = temp_effect + pH_effect
    actual_yield = min(max(90 + yield_effect, 70), 100)
    return actual_yield

# Insert Batch Parameters and Quality Attributes with variations
for batch_id in range(1, 11):
    temperature = 55 + random.uniform(-3, 3)
    pH = 2.5 + random.uniform(-0.5, 0.5)
    actual_yield = calculate_yield(temperature, pH)
    date_time = f'2023-08-{10 + batch_id} 10:00:00'

    cursor_exec.execute("INSERT INTO batches (date_time, actual_yield) VALUES (?, ?)", (date_time, actual_yield))
    cursor_exec.execute("INSERT INTO batch_parameters (batch_id, temperature, pressure, pH, mixing_speed) VALUES (?, ?, 1.5, ?, 250)", (batch_id, temperature, pH))
    cursor_exec.execute("INSERT INTO batch_quality_attributes (batch_id, purity, potency, stability) VALUES (?, 98.5, 'High', 'Stable')", (batch_id,))

conn_exec.commit()

# Query and Prepare the Data
query = "SELECT bp.temperature, bp.pH, b.actual_yield FROM batches AS b JOIN batch_parameters AS bp ON b.id = bp.batch_id"
cursor_exec.execute(query)
batch_data = cursor_exec.fetchall()
temperatures, pHs, actual_yields = zip(*batch_data)
X = np.column_stack((temperatures, pHs))
y = np.array(actual_yields)

# Create and Fit the Polynomial Model
polynomial_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
polynomial_model.fit(X, y)

# Generate a meshgrid for temperature and pH to visualize the surface
temp_range = np.linspace(min(temperatures), max(temperatures), 100)
pH_range = np.linspace(min(pHs), max(pHs), 100)
temp_mesh, pH_mesh = np.meshgrid(temp_range, pH_range)
X_mesh = np.column_stack((temp_mesh.ravel(), pH_mesh.ravel()))

# Predict the yield using the polynomial model
predicted_yield_mesh = polynomial_model.predict(X_mesh).reshape(temp_mesh.shape)

# Create 3D Scatter Plot using Plotly
batch_data_df = pd.DataFrame({'Temperature (°C)': temperatures, 'pH': pHs, 'Actual Yield (%)': actual_yields})
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=temperatures, y=pHs, z=actual_yields, mode='markers', name='Actual Yield'))
fig.add_trace(go.Surface(x=temp_mesh, y=pH_mesh, z=predicted_yield_mesh, opacity=0.5, name='Predicted Yield'))
fig.update_layout(scene=dict(xaxis_title='Temperature (°C)', yaxis_title='pH', zaxis_title='Actual Yield (%)'), title='Correlation Between Temperature, pH, and Yield')
fig.show()
