In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
def plot_grid(data, fig_size, grid_size, plot_type, target = ''):
    fig = plt.figure(figsize = fig_size)
    if plot_type == 'histplot':
        for i, column_name in enumerate(data.select_dtypes(exclude = 'object').columns):
            fig.add_subplot(grid_size[0], grid_size[1], i + 1)
            plot = sns.histplot(data[column_name], kde = True, color = 'royalblue', stat = 'count')
    if plot_type == 'boxplot':
        for i, column_name in enumerate(data.select_dtypes(exclude = 'object').columns):
            fig.add_subplot(grid_size[0], grid_size[1], i + 1)
            plot = sns.boxplot(x = data[column_name], color = 'royalblue')
    if plot_type == 'countplot':
        target = data[target]
        for i, column_name in enumerate(data.drop(target.name, axis = 1).columns):
            fig.add_subplot(grid_size[0], grid_size[1], i + 1)
            plot = sns.countplot(x = data[column_name], hue = target, palette = 'Blues_r')
            plot.legend(loc = 'upper right', title = target.name)
    plt.tight_layout()

In [None]:
! pip install xlrd

In [None]:
df = pd.read_excel('/kaggle/input/concrete-comprehensive-strength/Concrete_Data.xls', sheet_name='Sheet1')
df.head()

In [None]:
df_old = df.copy

In [None]:
df.columns

In [None]:
cols_name = df.columns
for col in cols_name:
    before = col
    after = col.split('(')[0].strip()
    df = df.rename(columns = {f'{before}': f'{after}'})

In [None]:
df

* Cement - **Цемент**
* Blast Furnace Slag - **Доменный шлак**
* Fly Ash - **Летающий пепел**
* Water - **Вода**
* Superplasticizer - **Суперпластификатор**
* Coarse Aggregate - **Грубый заполнитель**
* Fine Aggregate - **Мелкий заполнитель**
* Age - **Сжатие в возрасте** (в днях)
* Concrete compressive strength - **Прочность бетона на сжатие**

In [None]:
df.describe()

In [None]:
plot_grid(df.drop('Concrete compressive strength', axis = 1), (50, 30), (14,16), 'histplot')

In [None]:
df.columns

In [None]:
sns.histplot(df['Fly Ash'], kde = True, color = 'royalblue', stat = 'count')

In [None]:
y_log = df[df['Fly Ash'] > 0]['Fly Ash'].apply(np.log)
sns.histplot(y_log, kde = True, color = 'royalblue', stat = 'count')

In [None]:
plot_grid(df.drop('Concrete compressive strength', axis = 1), (50, 30), (12,14), 'boxplot')

In [None]:
plt.figure(figsize = (16, 6))
sns.heatmap(df.corr(), 
            annot = True,
            fmt = '.2f',
            square = True,
            cmap = "magma", 
            mask = np.triu(df.corr()))

In [None]:
sns.histplot(df['Water'], kde=True, color='royalblue', stat='count')

In [None]:
sns.histplot(df['Water'].apply(np.log), kde=True, color='royalblue', stat='count')

In [None]:
plt.figure(figsize = (16, 6))
sns.heatmap(df.corr(), 
            annot = True,
            fmt = '.2f',
            square = True,
            cmap = "magma", 
            mask = np.triu(df.corr()))

In [None]:
df.shape[0]

In [None]:
df = df[df['Water'] > 130]
df = df[df['Water'] < 230]
box = sns.boxplot(df['Water'], palette='magma')

In [None]:
df.shape[0]

In [None]:
plt.figure(figsize = (16, 6))
sns.heatmap(df.corr(), 
            annot = True,
            fmt = '.2f',
            square = True,
            cmap = "magma", 
            mask = np.triu(df.corr()))