# Predicting Concrete Compressive strength based on the parameters using machine learning Algorithms

Concrete is the most important material in civil engineering. The concrete compressive strength is a highly nonlinear function of age and ingredients. Given the components, predict the strength of the mixture

Data is provided in .csv format

File descriptions
train_data.csv - the training set
test_data.csv - the test set
sample.csv - a sample submission file in the correct format
meta.txt - supplemental information about the data
Data fields
cement - Cement (component 1)(kg in a m^3 mixture)
blast - Blast Furnace Slag (component 2)(kg in a m^3 mixture)
flyash - Fly Ash (component 3)(kg in a m^3 mixture)
water - Water (component 4)(kg in a m^3 mixture)
sp - Superplasticizer (component 5)(kg in a m^3 mixture)
ca - Coarse Aggregate (component 6)(kg in a m^3 mixture)
fa - Fine Aggregate (component 7)(kg in a m^3 mixture)
age - Age (day)
strength - Concrete compressive strength(MPa, megapascals)


In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.ensemble import RandomForestRegressor  as RFR

In [None]:
# list(frange(0.0,1.0,0.15))

In [None]:
test = pd.read_csv('dataset/test_data.csv')
train = pd.read_csv('dataset/train_data2.csv')

In [None]:
train

In [None]:
test

In [None]:
y = train['strength']
y

In [None]:
x = train.iloc[:,:-1]
x

In [None]:
xtr, xte, ytr, yte = train_test_split(x, y, test_size = 0.2)

In [None]:
print(xtr.shape, ytr.shape, xte.shape, yte.shape)

In [None]:
from math import sqrt
def score(y_actual, y_predicted):
    return sqrt(mean_squared_error(y_actual, y_predicted))

In [None]:
model = RFR(min_samples_leaf=4, max_features= 0.75, n_estimators= 150)
model.fit(xtr,ytr)

In [None]:
# for i in range(15,200,15):
#     model = RFR(min_samples_leaf=4, max_features= i/200, n_estimators= 100, random_state= 42)
#     model.fit(xtr,ytr)
#     ytr_pre = np.round(model.predict(xtr), 2)
#     yte_pre = np.round(model.predict(xte), 2)
#     ytr_pre_score = score(ytr, ytr_pre)
#     yte_pre_score = score(yte, yte_pre)
#     print(ytr_pre_score , yte_pre_score, ytr_pre_score-yte_pre_score)

In [None]:
ytr_pre = np.round(model.predict(xtr), 3)
yte_pre = np.round(model.predict(xte), 3)
ytr_pre_score = score(ytr, ytr_pre)
yte_pre_score = score(yte, yte_pre)
print(ytr_pre_score , yte_pre_score, ytr_pre_score-yte_pre_score)

In [None]:
out = model.predict(test)
out = np.round(out, 2)
print(out)
samout = pd.DataFrame({'predicted': out})
samout.to_csv('samout.csv')