In [1]:
import os
import pandas as pd

In [2]:
def load_data(file_path):
    """
    Load data from a CSV file into a pandas DataFrame.
    
    Parameters:
    file_path (str): The path to the CSV file.
    
    Returns:
    pd.DataFrame: DataFrame containing the loaded data.
    """
    try:
        data = pd.read_csv(file_path, sep='\t', encoding='utf-8')
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [3]:
data = load_data("D:\Tasks\Record_Filter\marketing_campaign.csv")

In [4]:
data.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,...,5,0,0,0,0,0,0,3,11,0


In [5]:
Row_Count, Column_Count = data.shape
print(f"Row Count: {Row_Count}, Column Count: {Column_Count}")

Row Count: 2240, Column Count: 29


In [6]:
data.dtypes

ID                       int64
Year_Birth               int64
Education               object
Marital_Status          object
Income                 float64
Kidhome                  int64
Teenhome                 int64
Dt_Customer             object
Recency                  int64
MntWines                 int64
MntFruits                int64
MntMeatProducts          int64
MntFishProducts          int64
MntSweetProducts         int64
MntGoldProds             int64
NumDealsPurchases        int64
NumWebPurchases          int64
NumCatalogPurchases      int64
NumStorePurchases        int64
NumWebVisitsMonth        int64
AcceptedCmp3             int64
AcceptedCmp4             int64
AcceptedCmp5             int64
AcceptedCmp1             int64
AcceptedCmp2             int64
Complain                 int64
Z_CostContact            int64
Z_Revenue                int64
Response                 int64
dtype: object

In [7]:
# Convert the 'date_col' to datetime
data['Dt_Customer'] = pd.to_datetime(data['Dt_Customer'], errors='coerce')

In [8]:
data.dtypes

ID                              int64
Year_Birth                      int64
Education                      object
Marital_Status                 object
Income                        float64
Kidhome                         int64
Teenhome                        int64
Dt_Customer            datetime64[ns]
Recency                         int64
MntWines                        int64
MntFruits                       int64
MntMeatProducts                 int64
MntFishProducts                 int64
MntSweetProducts                int64
MntGoldProds                    int64
NumDealsPurchases               int64
NumWebPurchases                 int64
NumCatalogPurchases             int64
NumStorePurchases               int64
NumWebVisitsMonth               int64
AcceptedCmp3                    int64
AcceptedCmp4                    int64
AcceptedCmp5                    int64
AcceptedCmp1                    int64
AcceptedCmp2                    int64
Complain                        int64
Z_CostContac

In [9]:
Row_Count, Column_Count = data.shape
print(f"Row Count: {Row_Count}, Column Count: {Column_Count}")

Row Count: 2240, Column Count: 29


In [10]:
data['Cus_year'] = data['Dt_Customer'].dt.year.astype('Int64')
data['Cus_month'] = data['Dt_Customer'].dt.month.astype('Int64')

In [11]:
Years = data['Cus_year'].unique().tolist()
Months = data['Cus_month'].unique().tolist()
Education = data['Education'].unique().tolist()
print("Years of Customer:", Years)
print("Months of Customer", Months)
print("Education levels:", Education)

Years of Customer: [2012, 2014, <NA>, 2013]
Months of Customer [4, 8, <NA>, 10, 9, 6, 3, 11, 2, 1, 12, 5, 7]
Education levels: ['Graduation', 'PhD', 'Master', 'Basic', '2n Cycle']


In [12]:
def Create_dir(nested_path):
    try:
        os.makedirs(nested_path)
        print(f"Nested directories '{nested_path}' created successfully.")
    except FileExistsError:
        print(f"Nested directories '{nested_path}' already exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [14]:
for year in Years:
    for month in Months:
        for education in Education:
            _filter1 = data["Cus_year"]==year
            _filter2 = data["Cus_month"]==month
            _filter3 = data["Education"]==education
            try:
                # Create directory structure for each year, month
                Create_dir(f"D:\\Tasks\\Record_Filter\\Filtered_Data\\{year}\\{month}")

                d1 = data.where(_filter1 & _filter2 & _filter3, inplace=False)
                # Remove rows only if ALL columns in that row are null
                d1 = d1.dropna(how='all')
                d1.to_csv(f"D:\\Tasks\\Record_Filter\\Filtered_Data\\{year}\\{month}\\{education}.csv", index=False)

                print(f"Filtered data for {year}-{month} with education {education} saved.")
            except Exception as e:
                continue



Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\4' created successfully.
Filtered data for 2012-4 with education Graduation saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\4' already exist.
Filtered data for 2012-4 with education PhD saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\4' already exist.
Filtered data for 2012-4 with education Master saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\4' already exist.
Filtered data for 2012-4 with education Basic saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\4' already exist.
Filtered data for 2012-4 with education 2n Cycle saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\8' created successfully.
Filtered data for 2012-8 with education Graduation saved.
Nested directories 'D:\Tasks\Record_Filter\Filtered_Data\2012\8' already exist.
Filtered data for 2012-8 with education PhD saved.
Nested directories 'D:\Tasks\Record_Filter\Fi