### Step - 1
```cpp
    Import the necessary libraries
```

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

### Step - 2
```cpp
    Load the data in a data frame
```

In [2]:
file_path = 'Heart.csv'
df = pd.read_csv(file_path)

### Step - 3
```cpp
    Data Inspection
```

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  303 non-null    int64  
 1   Age         303 non-null    int64  
 2   Sex         303 non-null    int64  
 3   ChestPain   303 non-null    object 
 4   RestBP      303 non-null    int64  
 5   Chol        303 non-null    int64  
 6   Fbs         303 non-null    int64  
 7   RestECG     303 non-null    int64  
 8   MaxHR       303 non-null    int64  
 9   ExAng       303 non-null    int64  
 10  Oldpeak     303 non-null    float64
 11  Slope       303 non-null    int64  
 12  Ca          299 non-null    float64
 13  Thal        301 non-null    object 
 14  AHD         303 non-null    object 
dtypes: float64(2), int64(10), object(3)
memory usage: 35.6+ KB


In [4]:
df.describe()

Unnamed: 0.1,Unnamed: 0,Age,Sex,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,299.0
mean,152.0,54.438944,0.679868,131.689769,246.693069,0.148515,0.990099,149.607261,0.326733,1.039604,1.60066,0.672241
std,87.612784,9.038662,0.467299,17.599748,51.776918,0.356198,0.994971,22.875003,0.469794,1.161075,0.616226,0.937438
min,1.0,29.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0
25%,76.5,48.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0
50%,152.0,56.0,1.0,130.0,241.0,0.0,1.0,153.0,0.0,0.8,2.0,0.0
75%,227.5,61.0,1.0,140.0,275.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0
max,303.0,77.0,1.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0


In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,1,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,fixed,No
1,2,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,normal,Yes
2,3,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,reversable,Yes
3,4,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,normal,No
4,5,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,normal,No


### Step - 4
```cpp
    Converting data to other file types like .xlsx, .json and .sql    
```

In [6]:
df.to_excel('dataset.xlsx', index = False)

In [7]:
df.to_json('dataset.json', orient = 'records', indent = 2)

In [8]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:')
df.to_sql('dataset', con = engine, index = False, if_exists = 'replace')

303

### Step - 5
```cpp
    Load and verify the converted data
```

In [9]:
df_excel = pd.read_excel('dataset.xlsx')

In [10]:
df_json = pd.read_json('dataset.json')

In [11]:
df_sql = pd.read_sql('dataset', con = engine)

### Step - 6
```cpp
    Data Cleaning
```

In [12]:
df.ffill(inplace = True)

In [13]:
df.drop_duplicates(inplace = True)

In [14]:
scaler = MinMaxScaler()
numerical_cols = df.select_dtypes(include = ['number']).columns
df = pd.get_dummies(df, drop_first = True)
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

In [15]:
df.head()

Unnamed: 0.1,Unnamed: 0,Age,Sex,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,ChestPain_nonanginal,ChestPain_nontypical,ChestPain_typical,Thal_normal,Thal_reversable,AHD_Yes
0,0.0,0.708333,1.0,0.481132,0.244292,1.0,1.0,0.603053,0.0,0.370968,1.0,0.0,False,False,True,False,False,False
1,0.003311,0.791667,1.0,0.622642,0.365297,0.0,1.0,0.282443,1.0,0.241935,0.5,1.0,False,False,False,True,False,True
2,0.006623,0.791667,1.0,0.245283,0.23516,0.0,1.0,0.442748,1.0,0.419355,0.5,0.666667,False,False,False,False,True,True
3,0.009934,0.166667,1.0,0.339623,0.283105,0.0,0.0,0.885496,0.0,0.564516,1.0,0.0,True,False,False,True,False,False
4,0.013245,0.25,0.0,0.339623,0.178082,0.0,1.0,0.770992,0.0,0.225806,0.0,0.0,False,True,False,True,False,False


### Step - 7
```cpp
    Export the cleaned data
```

In [16]:
df.to_csv('Cleaned_Data_V1.csv', index = False)