In [6]:
import pandas as pd

# Load the Exchange Rates dataset
exchange_rates_df = pd.read_csv('exchange_rates.csv', encoding='ISO-8859-1')

# Display the first few rows of the dataset to inspect
print(exchange_rates_df.head())


       Date Currency  Exchange
0  1/1/2015      USD    1.0000
1  1/1/2015      CAD    1.1583
2  1/1/2015      AUD    1.2214
3  1/1/2015      EUR    0.8237
4  1/1/2015      GBP    0.6415


In [7]:
# Check for missing values
print(exchange_rates_df.isnull().sum())


Date        0
Currency    0
Exchange    0
dtype: int64


In [8]:
# Convert 'Date' to datetime format
exchange_rates_df['Date'] = pd.to_datetime(exchange_rates_df['Date'], format='%m/%d/%Y')

# Convert 'Exchange' to numeric, forcing errors to NaN (though it should be clean)
exchange_rates_df['Exchange'] = pd.to_numeric(exchange_rates_df['Exchange'], errors='coerce')

# Check the data types
print(exchange_rates_df.dtypes)


Date        datetime64[ns]
Currency            object
Exchange           float64
dtype: object


In [9]:
# Check for duplicates based on 'Date' and 'Currency'
duplicates = exchange_rates_df.duplicated(subset=['Date', 'Currency'])
print(f"Number of duplicate Date and Currency combinations: {duplicates.sum()}")

# Check for missing values in key columns
missing_keys = exchange_rates_df[['Date', 'Currency']].isnull().sum()
print(f"Missing values in Date and Currency: {missing_keys}")

# Display unique values for key columns to ensure consistency
unique_dates = exchange_rates_df['Date'].nunique()
total_records = len(exchange_rates_df)
print(f"Unique Dates: {unique_dates}")
print(f"Total number of records: {total_records}")


Number of duplicate Date and Currency combinations: 0
Missing values in Date and Currency: Date        0
Currency    0
dtype: int64
Unique Dates: 2243
Total number of records: 11215


In [10]:
# Check for duplicates in the key columns
duplicates = exchange_rates_df.duplicated(subset=['Date', 'Currency'])
print(f"Number of duplicate Date and Currency combinations: {duplicates.sum()}")

# Check for any missing values in key columns
missing_keys = exchange_rates_df[['Date', 'Currency']].isnull().sum()
print(f"Missing values in Date and Currency: {missing_keys}")

# Display unique values for key columns to ensure consistency
unique_dates = exchange_rates_df['Date'].nunique()
total_records = len(exchange_rates_df)
print(f"Unique Dates: {unique_dates}")
print(f"Total number of records: {total_records}")


Number of duplicate Date and Currency combinations: 0
Missing values in Date and Currency: Date        0
Currency    0
dtype: int64
Unique Dates: 2243
Total number of records: 11215


In [11]:
exchange_rates_df.to_csv('cleaned_Exchange_Rates.csv', index=False)