In [1]:
import pandas as pd
import os
from datetime import datetime

# Define the folder containing the CSV files
data_folder = r"C:\Users\prabh\Downloads\data"

# List of file names in reverse chronological order (from September 2024 to October 2023)
file_names = [
    f"{month}_{year}"
    for year in [24, 23]
    for month in ["december","november", "october","september", "august", "july", "june", "may", "april", "march", "february", "january"]
    
]

# Initialize an empty list to store DataFrames
dfs = []

# Loop through the CSV files
for file_name in file_names:
    file_path = os.path.join(data_folder, f"{file_name}.csv")

    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)

        # Extract month and year from the file name
        month_str, year_str = file_name.split("_")
        year = 2000 + int(year_str)  # Convert year string to integer
        month = datetime.strptime(month_str, "%B").month

        # Add the 'month' column in datetime format
        df['month'] = datetime(year, month, 1)

        # Drop specified columns (using a list comprehension for conciseness)
        columns_to_drop = [
            col for col in [
                'listing_url', 'scrape_id', 'last_scraped', 'name', 'picture_url', 'host_url', 'host_name',
                'host_thumbnail_url', 'host_picture_url', 'neighbourhood_group_cleansed', 'calendar_updated',
                'latitude', 'longitude', 'minimum_minimum_nights', 'maximum_minimum_nights',
                'minimum_maximum_nights', 'maximum_maximum_nights',  'calculated_host_listings_count_entire_homes',
                'calculated_host_listings_count_private_rooms', 'calculated_host_listings_count_shared_rooms',
                'host_listings_count', 'host_total_listings_count', 'review_scores_accuracy',
                'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication',
                'review_scores_location', 'review_scores_value'
            ] if col in df.columns  # Check if column exists before dropping
        ]
        df = df.drop(columns=columns_to_drop)

        # Append the processed DataFrame to the list
        dfs.append(df)

    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except Exception as e:  # Catch other potential errors during file processing
        print(f"Error processing file {file_path}: {e}")

if dfs:  # Check if any DataFrames were successfully loaded
  combined_df = pd.concat(dfs, ignore_index=True)
  combined_df.to_csv(os.path.join(data_folder, "listing_oct_to_sept.csv"), index=False)

  # Created/Modified files during execution:
  print("data/listing_oct_to_sept.csv") # Corrected line
else:
  print("No CSV files were successfully processed.")

File not found: C:\Users\prabh\Downloads\data\december_24.csv
File not found: C:\Users\prabh\Downloads\data\november_24.csv
File not found: C:\Users\prabh\Downloads\data\october_24.csv
File not found: C:\Users\prabh\Downloads\data\september_23.csv
File not found: C:\Users\prabh\Downloads\data\august_23.csv
File not found: C:\Users\prabh\Downloads\data\july_23.csv
File not found: C:\Users\prabh\Downloads\data\june_23.csv
File not found: C:\Users\prabh\Downloads\data\may_23.csv
File not found: C:\Users\prabh\Downloads\data\april_23.csv
File not found: C:\Users\prabh\Downloads\data\march_23.csv
File not found: C:\Users\prabh\Downloads\data\february_23.csv
File not found: C:\Users\prabh\Downloads\data\january_23.csv
data/listing_oct_to_sept.csv


In [2]:
combined_df

Unnamed: 0,id,source,description,neighborhood_overview,host_id,host_since,host_location,host_about,host_response_time,host_response_rate,...,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,license,instant_bookable,calculated_host_listings_count,reviews_per_month,month
0,1419,previous scrape,"This large, family home is located in one of T...",The apartment is located in the Ossington stri...,1565,2008-08-08,"Vancouver, Canada","I live in Vancouver, Canada with my husband an...",,,...,0,0,2015-07-19,2017-08-07,5.00,,f,1,0.05,2024-09-01
1,8077,previous scrape,Guest room in a luxury condo with access to al...,,22795,2009-06-22,"Toronto, Canada",My husband and I have been airbnb host for alm...,,,...,0,0,2009-08-20,2013-08-27,4.84,,f,2,0.92,2024-09-01
2,26654,city scrape,"CN Tower, TIFF Bell Lightbox, Metro Convention...",There's a reason they call it the Entertainmen...,113345,2010-04-25,,Welcome to Toronto! \r\n\r\nAfter our first me...,within a few hours,100%,...,0,0,2011-01-05,2023-09-01,4.79,,f,5,0.25,2024-09-01
3,27423,city scrape,"Brand new, fully furnished studio basement apa...",,118124,2010-05-04,"Toronto, Canada",I love to travel and meet new people from arou...,,,...,2,0,2010-06-07,2024-06-29,4.93,,f,1,0.17,2024-09-01
4,30931,previous scrape,Split level waterfront condo with a breathtaki...,,22795,2009-06-22,"Toronto, Canada",My husband and I have been airbnb host for alm...,,,...,0,0,2010-08-11,2010-08-11,5.00,,f,2,0.01,2024-09-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248518,992970297003802524,city scrape,Enjoy a stylish experience at this centrally-l...,,417398637,2021-08-07,"Toronto, Canada","I'm Niloofar, a nature-loving traveler who enj...",within an hour,100%,...,0,0,,,,STR-2302-FDWKVP,f,2,,2023-10-01
248519,992971866858411159,city scrape,Keep it simple at this peaceful and centrally ...,,261077177,2019-05-10,"Toronto, Canada","I am from Toronto,Canada",,,...,0,0,,,,,f,1,,2023-10-01
248520,993055995141079908,city scrape,You'll have a great time at this comfortable p...,,444821980,2022-02-12,,,within a few hours,100%,...,0,0,,,,STR-2309-GHLDVS,t,2,,2023-10-01
248521,993282000981641209,city scrape,This is an inexpensive no frills 3 Bedroom + D...,Bloor West Village is a residential area locat...,8280084,2013-08-20,"Toronto, Canada",This property is family owned and managed by a...,,,...,0,0,,,,,t,1,,2023-10-01


In [3]:
# Find and print unique values in the 'month' column
unique_months = combined_df['month'].unique()
print("\nUnique values in 'month' column:")
print(unique_months)


Unique values in 'month' column:
<DatetimeArray>
['2024-09-01 00:00:00', '2024-08-01 00:00:00', '2024-07-01 00:00:00',
 '2024-06-01 00:00:00', '2024-05-01 00:00:00', '2024-04-01 00:00:00',
 '2024-03-01 00:00:00', '2024-02-01 00:00:00', '2024-01-01 00:00:00',
 '2023-12-01 00:00:00', '2023-11-01 00:00:00', '2023-10-01 00:00:00']
Length: 12, dtype: datetime64[us]
