In [1]:
import pandas as pd

# Load Dataset

df = pd.read_csv('/kaggle/input/electronic-components/electronic_components.csv')

# Step 1: Check for Missing Values
print("\nMissing Values Before Cleaning:")
print(df.isnull().sum())





Missing Values Before Cleaning:
Part Number             0
Voltage (V)             0
Current (A)             0
Compliance            164
Lifecycle Stage         0
Last Revision Date      0
dtype: int64


In [2]:
# Step 2: Convert 'Last Revision Date' to Datetime Format
df['Last Revision Date'] = pd.to_datetime(df['Last Revision Date'], errors='coerce')




In [3]:
# Step 3: Ensure Numeric Columns Are in Correct Format
numeric_columns = ['Voltage (V)', 'Current (A)']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')



In [4]:
# Step 4: Remove Duplicates
df_cleaned = df.drop_duplicates()



In [5]:
# Step 5: Check for Missing Values After Cleaning
print("\nMissing Values After Cleaning:")
print(df_cleaned.isnull().sum())


Missing Values After Cleaning:
Part Number             0
Voltage (V)             0
Current (A)             0
Compliance            164
Lifecycle Stage         0
Last Revision Date      0
dtype: int64


In [7]:
# Step 6: Save Cleaned Data for Power BI Import
cleaned_file_path = "cleaned_electronic_components.csv"
df_cleaned.to_csv(cleaned_file_path, index=False)
print(f"\n✅ Cleaned dataset saved as: {cleaned_file_path}")






✅ Cleaned dataset saved as: cleaned_electronic_components.csv


In [8]:
# Display basic dataset information after cleaning
print("\nDataset Info After Cleaning:")
print(df_cleaned.info())


Dataset Info After Cleaning:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Part Number         500 non-null    object        
 1   Voltage (V)         500 non-null    float64       
 2   Current (A)         500 non-null    float64       
 3   Compliance          336 non-null    object        
 4   Lifecycle Stage     500 non-null    object        
 5   Last Revision Date  500 non-null    datetime64[ns]
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 23.6+ KB
None


In [9]:
# Display first 5 rows
print("\nFirst 5 Rows of Cleaned Data:")
print(df_cleaned.head())



First 5 Rows of Cleaned Data:
  Part Number  Voltage (V)  Current (A) Compliance Lifecycle Stage  \
0     PN-0000         5.62        3.494        NaN     End-of-Life   
1     PN-0001        11.50        2.685      REACH        Obsolete   
2     PN-0002         9.27        1.555        NaN     End-of-Life   
3     PN-0003         7.91        4.071        NaN          Active   
4     PN-0004         3.39        3.427       RoHS  Pre-Production   

  Last Revision Date  
0         2015-01-04  
1         2015-01-11  
2         2015-01-18  
3         2015-01-25  
4         2015-02-01  
