## Addind 10 rows in Dataset using Data Augmentation Technique

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Material Compressive Strength Experimental Data.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6139 entries, 0 to 6138
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material Quantity (gm)      6030 non-null   float64
 1   Additive Catalyst (gm)      6030 non-null   float64
 2   Ash Component (gm)          6030 non-null   float64
 3   Water Mix (ml)              6030 non-null   float64
 4   Plasticizer (gm)            6030 non-null   float64
 5   Moderate Aggregator         6030 non-null   float64
 6   Refined Aggregator          6030 non-null   float64
 7   Formulation Duration (hrs)  6030 non-null   float64
 8   Compression Strength MPa    6139 non-null   float64
dtypes: float64(9)
memory usage: 431.8 KB


In [3]:
#Checking Null Values
df.isnull().sum()

Material Quantity (gm)        109
Additive Catalyst (gm)        109
Ash Component (gm)            109
Water Mix (ml)                109
Plasticizer (gm)              109
Moderate Aggregator           109
Refined Aggregator            109
Formulation Duration (hrs)    109
Compression Strength MPa        0
dtype: int64

In [4]:
#Removing Null Values
df.dropna(subset = ["Material Quantity (gm)"], inplace = True)
df.isnull().sum()

Material Quantity (gm)        0
Additive Catalyst (gm)        0
Ash Component (gm)            0
Water Mix (ml)                0
Plasticizer (gm)              0
Moderate Aggregator           0
Refined Aggregator            0
Formulation Duration (hrs)    0
Compression Strength MPa      0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6030 entries, 0 to 6138
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material Quantity (gm)      6030 non-null   float64
 1   Additive Catalyst (gm)      6030 non-null   float64
 2   Ash Component (gm)          6030 non-null   float64
 3   Water Mix (ml)              6030 non-null   float64
 4   Plasticizer (gm)            6030 non-null   float64
 5   Moderate Aggregator         6030 non-null   float64
 6   Refined Aggregator          6030 non-null   float64
 7   Formulation Duration (hrs)  6030 non-null   float64
 8   Compression Strength MPa    6030 non-null   float64
dtypes: float64(9)
memory usage: 471.1 KB


In [6]:
#Adding 10 rows using Data Augmentation Technique

#Specify the number of rows we want to generate
extra_rows = 10

#Create empty dataframe to store augmented rows
augmented_df = pd.DataFrame()


In [7]:
#Apply data augmentation technique to generate extra rows
for _ in range(extra_rows):
    #Randomly select an existing row from the cement dataset
    random_index = np.random.choice(df.index)
    selected_row = df.loc[random_index, :].copy()
    
    #Apply data augmentation technique to the selected row
    noise_magnitude = 0.1
    noise = np.random.normal(0, noise_magnitude, size = len(selected_row))
    selected_row += noise
    
    #Add the augmented row to the DataFrame
    augmented_df = pd.concat([augmented_df, selected_row.to_frame().T], ignore_index = True)
    
augmented_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material Quantity (gm)      10 non-null     float64
 1   Additive Catalyst (gm)      10 non-null     float64
 2   Ash Component (gm)          10 non-null     float64
 3   Water Mix (ml)              10 non-null     float64
 4   Plasticizer (gm)            10 non-null     float64
 5   Moderate Aggregator         10 non-null     float64
 6   Refined Aggregator          10 non-null     float64
 7   Formulation Duration (hrs)  10 non-null     float64
 8   Compression Strength MPa    10 non-null     float64
dtypes: float64(9)
memory usage: 848.0 bytes


In [10]:
# Concatenate the original dataset and the augmented dataset
augmented_dataset = pd.concat([df, augmented_df], ignore_index = True)

augmented_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6040 entries, 0 to 6039
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material Quantity (gm)      6040 non-null   float64
 1   Additive Catalyst (gm)      6040 non-null   float64
 2   Ash Component (gm)          6040 non-null   float64
 3   Water Mix (ml)              6040 non-null   float64
 4   Plasticizer (gm)            6040 non-null   float64
 5   Moderate Aggregator         6040 non-null   float64
 6   Refined Aggregator          6040 non-null   float64
 7   Formulation Duration (hrs)  6040 non-null   float64
 8   Compression Strength MPa    6040 non-null   float64
dtypes: float64(9)
memory usage: 424.8 KB


In [11]:
augmented_dataset.to_csv("Augmented_CementData.csv",index=False)