In [None]:
import pandas as pd

csv_files = {
    "Australia": r"C:\Users\HP\Desktop\Wine_Stat\Data\Australia_Wine_Stats.csv",
    "Chile": r"C:\Users\HP\Desktop\Wine_Stat\Data\Chile_Wine_Stats.csv",
    "France": r"C:\Users\HP\Desktop\Wine_Stat\Data\France_Wine_Stats.csv",
    "Italy": r"C:\Users\HP\Desktop\Wine_Stat\Data\Italy_Wine_Stats.csv",
    "NewZealand": r"C:\Users\HP\Desktop\Wine_Stat\Data\New Zealand_Wine_Stats.csv",
    "Protugal": r"C:\Users\HP\Desktop\Wine_Stat\Data\Portugal_Wine_Stats.csv",
    "Spain": r"C:\Users\HP\Desktop\Wine_Stat\Data\Spain_Wine_Stats.csv",
    "USA": r"C:\Users\HP\Desktop\Wine_Stat\Data\USA_Wine_Stats.csv"
}

dataframes = []

for country, path in csv_files.items():
    df = pd.read_csv(path, encoding="utf-8", sep=None, engine='python')
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]  # Drop index column if present
    df.columns = df.columns.str.strip()  # Clean up column names
    df["Country"] = country  # Add country column
    dataframes.append(df)

wine_df = pd.concat(dataframes, ignore_index=True)

with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
    print(wine_df)

In [None]:
# 1. Examine structure and data types
print("\nDataFrame Info:")
print(wine_df.info())

# Show basic stats
print("\nSummary Statistics:")
print(wine_df.describe(include='all'))

# Show unique columns
print("\nColumns in the DataFrame:")
print(wine_df.columns.tolist())

# 2. Remove duplicates
before = wine_df.shape[0]
wine_df = wine_df.drop_duplicates()
after = wine_df.shape[0]
print(f"\nRemoved {before - after} duplicate rows.")

# 3. Remove rows with any null values (NaNs)
null_count = wine_df.isnull().sum().sum()
print(f"\nTotal missing values before cleaning: {null_count}")

wine_df = wine_df.dropna()

print(f"Final shape after cleaning: {wine_df.shape}")

with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
    print(wine_df)