**1. Import Necessary Libraries**

In [11]:
import pandas as pd
import numpy as np

**2. To read the data into Python.**

In [None]:
df = pd.read_csv('nifty_ds.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Shares Traded,Turnover (₹ Cr)
0,12-AUG-2024,24320.05,24472.8,24212.1,24347.0,279925100,30311.85
1,13-AUG-2024,24342.35,24359.95,24116.5,24139.0,239727640,25459.58
2,14-AUG-2024,24184.4,24196.5,24099.7,24143.75,303254705,27834.61
3,16-AUG-2024,24334.85,24563.9,24204.5,24541.15,271611087,28521.9
4,19-AUG-2024,24636.35,24638.8,24522.95,24572.65,243645503,22124.41


**3. Check For any missing Values**

In [13]:
print(df.isnull().sum())

Date               0
Open               0
High               0
Low                0
Close              0
Shares Traded      0
Turnover (₹ Cr)    0
dtype: int64


**4. Check for any duplicate entries**

In [14]:
print(df.duplicated().sum())

0


**5. Dealing with Inconsistent Data**

In [15]:
numerical_cols = df.select_dtypes(include=np.number).columns
print(numerical_cols)

for col in numerical_cols:
    invalid = df[df[col] < 0]

    if not invalid.empty:
        print(f"Invalid (negative) values found in '{col}':\n", invalid)
        print("-" * 30)

Index(['Open ', 'High ', 'Low ', 'Close ', 'Shares Traded ',
       'Turnover (₹ Cr)'],
      dtype='object')


**6. Stripping trailing spaces in column names**

In [16]:
df.columns = df.columns.str.strip()
print(df.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Shares Traded',
       'Turnover (₹ Cr)'],
      dtype='object')


**7. Check for Data Types**

In [17]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             249 non-null    object 
 1   Open             249 non-null    float64
 2   High             249 non-null    float64
 3   Low              249 non-null    float64
 4   Close            249 non-null    float64
 5   Shares Traded    249 non-null    int64  
 6   Turnover (₹ Cr)  249 non-null    float64
dtypes: float64(5), int64(1), object(1)
memory usage: 13.7+ KB
None


**8. Convert Date to Approriate Date type**

In [18]:
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y')
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Date             249 non-null    datetime64[ns]
 1   Open             249 non-null    float64       
 2   High             249 non-null    float64       
 3   Low              249 non-null    float64       
 4   Close            249 non-null    float64       
 5   Shares Traded    249 non-null    int64         
 6   Turnover (₹ Cr)  249 non-null    float64       
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 13.7 KB
None


In [19]:
df = df.rename ({'Turnover (₹ Cr)': 'Turnover (Cr Rs)'}, axis =1)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Shares Traded,Turnover (Cr Rs)
0,2024-08-12,24320.05,24472.8,24212.1,24347.0,279925100,30311.85
1,2024-08-13,24342.35,24359.95,24116.5,24139.0,239727640,25459.58
2,2024-08-14,24184.4,24196.5,24099.7,24143.75,303254705,27834.61
3,2024-08-16,24334.85,24563.9,24204.5,24541.15,271611087,28521.9
4,2024-08-19,24636.35,24638.8,24522.95,24572.65,243645503,22124.41


**9. Save Cleaned Data**

In [None]:
df.to_csv('cleaned_nifty_ds.csv', index=False)

**Conclusion:**

- No missing values.
- No duplicate records.
- No negative values in any attributes.
- Removed Trailing spaces in column names.
- Renamed Columns.
- Set appropriate Data types for all attributes.
- saved the cleaned data.