In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from joblib import dump, load

class ConcreteStrengthPredictor:
    def __init__(self, data_url):
        self.data = pd.read_csv(data_url)
        self.model = None
        self.scaler = StandardScaler()
    
    def preprocess_data(self):
        self.data.columns = ['Cement', 'Blast_Furnace_Slag', 'Fly_Ash', 'Water', 'Super_Plasticizer',
                             'Coarse_Aggregate', 'Fine_Aggregate', 'Age_of_testing', 'Concrete_Compressive_strength']
        
    def explore_data(self):
        print(self.data.head())
        print(self.data.isna().sum())
        print(self.data['Concrete_Compressive_strength'].describe())
        print(self.data['Age_of_testing'].value_counts())
    
    def prepare_data(self):
        X = self.data.drop('Concrete_Compressive_strength', axis=1)
        y = self.data[['Concrete_Compressive_strength']]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
        
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        self.X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
        self.X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)
        self.y_train = y_train
        self.y_test = y_test
        
        return self.X_train_scaled, self.X_test_scaled, self.y_train, self.y_test
    
    def train_model(self):
        self.model = LinearRegression()
        self.model.fit(self.X_train_scaled, self.y_train)
    
    def evaluate_model(self):
        y_pred_train = self.model.predict(self.X_train_scaled)
        y_pred_test = self.model.predict(self.X_test_scaled)
        
        train_score = self.model.score(self.X_train_scaled, self.y_train)
        test_score = self.model.score(self.X_test_scaled, self.y_test)
        mse = mean_squared_error(self.y_test, y_pred_test)
        
        print("Train Score:", train_score)
        print("Test Score:", test_score)
        print("Mean Squared Error:", mse)
    
    def cross_validation(self):
        scores = cross_val_score(LinearRegression(), self.X_train_scaled, self.y_train, cv=10)
        print("Cross Validation Scores:", scores)
        print("Standard Deviation:", scores.std())
        print("Mean Score:", scores.mean())
    
    def save_model(self, filename):
        dump(self.model, filename)
    
    def load_model(self, filename):
        self.model = load(filename)
    
    def predict_strength(self, X_new):
        X_scaled = self.scaler.transform(X_new)
        return self.model.predict(X_scaled)


# Create an instance of the ConcreteStrengthPredictor class
data_url = 'https://raw.githubusercontent.com/Percy-Agyei-Essiful/Compressive-Strength-of-Concrete/main/Compressive%20Strength.csv'
predictor = ConcreteStrengthPredictor(data_url)

# Preprocess the data
predictor.preprocess_data()

# Prepare the data
X_train_scaled, X_test_scaled, y_train, y_test = predictor.prepare_data()

# Train the model
predictor.train_model()

# Evaluate the model
predictor.evaluate_model()

# Perform cross-validation
predictor.cross_validation()

# Save the model
model_filename = 'concrete_strength_model.joblib'
predictor.save_model(model_filename)

# Load the model
predictor.load_model(model_filename)

# Predict the concrete compressive strength for new samples
new_samples = pd.DataFrame({
    'Cement': [336, 160.1],
    'Blast_Furnace_Slag': [0, 5.2],
    'Fly_Ash': [0, 315.9],
    'Water': [182, 143.9],
    'Super_Plasticizer': [3, 7.7],
    'Coarse_Aggregate': [986, 2049.5],
    'Fine_Aggregate': [817, 810.1],
    'Age_of_testing': [28, 28]
})

predictions = predictor.predict_strength(new_samples)

# Print the predictions
print(predictions)

Train Score: 0.6075869028874532
Test Score: 0.6192613019864958
Mean Squared Error: 96.8857952884701
Cross Validation Scores: [0.627775   0.62513767 0.67517291 0.61633497 0.51908919 0.61485093
 0.45754952 0.46982251 0.63452903 0.63330923]
Standard Deviation: 0.07213175962734932
Mean Score: 0.5873570940800538
[[28.07717138]
 [64.23451823]]


