In [26]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

In [27]:
# Directory path
directory = "C:/Users/lakha/OneDrive/Documents/House Flipping - Real Life/Local/Listed Properties"

# Get a list of all files in the directory
files = os.listdir(directory)

# Filter out only CSV files
csv_files = [file for file in files if file.endswith('.csv')]

# Sort the CSV files by modification time
csv_files.sort(key=lambda x: os.path.getmtime(os.path.join(directory, x)), reverse=True)

# Get the path to the latest CSV file
latest_csv_file = os.path.join(directory, csv_files[0])

# Read the latest CSV file into a DataFrame
listed_properties = pd.read_csv(latest_csv_file)

# Display the DataFrame
listed_properties = listed_properties.groupby(['Suburb', 'City', 'Province', 'Area'])['Title'].size().reset_index(name='# of Listed Properties')
listed_properties.head()


  listed_properties = pd.read_csv(latest_csv_file)


Unnamed: 0,Suburb,City,Province,Area,# of Listed Properties
0,bedfordview,johannesburg,gauteng,bedford-gardens,271
1,bedfordview,johannesburg,gauteng,bedford-park,21
2,bedfordview,johannesburg,gauteng,bedfordview,332
3,bedfordview,johannesburg,gauteng,essexwold,13
4,bedfordview,johannesburg,gauteng,meadowbrook,14


In [28]:
# Define the directory where the CSV files are located
directory = 'C:/Users/lakha/OneDrive/Documents/House Flipping - Real Life/Local/Sold Properties'  # Update this with the appropriate directory path

# Function to process each CSV file
def process_csv(file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # Remove duplicates based on "List ID" column, keeping the oldest "Sold Date"
    df['Sold Date'] = pd.to_datetime(df['Sold Date'])  # Convert 'Sold Date' to datetime
    df.sort_values(by='Sold Date', inplace=True)  # Sort by 'Sold Date' to get the oldest first
    df.drop_duplicates(subset='List ID', keep='first', inplace=True)
    
    return df

# List to store processed DataFrames
dfs = []

# Iterate through files in the directory
for file in os.listdir(directory):
    # Check if the file is a CSV file and starts with "Sold Properties"
    if file.endswith('.csv') and file.startswith('Sold Properties'):
        # Process the CSV file and append the DataFrame to the list
        file_path = os.path.join(directory, file)
        df = process_csv(file_path)
        dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
sold_properties = pd.concat(dfs, ignore_index=True)

# Filter sold properties where the sold date is within the last 30 days
last_30_days = datetime.now() - timedelta(days=30)
sold_properties_last_30_days = sold_properties[sold_properties['Sold Date'] >= last_30_days]

# Group the filtered DataFrame by "Area", "Suburb", "City", and "Province", and count the titles in each group
sold_properties = sold_properties_last_30_days.groupby('Area')['Title'].size().reset_index(name='# of Sold Properties')
sold_properties

Unnamed: 0,Area,# of Sold Properties
0,albertville,2
1,alveda,1
2,atholl,7
3,auckland-park,1
4,bedford-gardens,1
...,...,...
138,westdene,8
139,winchester-hills,1
140,windsor-east,9
141,windsor-west,5


In [31]:
# Merge the listed_properties dataframe with the sold_properties_counts dataframe based on the "Area" column
merged_df = pd.merge(listed_properties, sold_properties, on='Area', how='left')

# Fill NaN values in the "Sold Properties Count" column with 0
merged_df['# of Sold Properties'] = merged_df['# of Sold Properties'].fillna(0).astype(int)
merged_df['Inventory'] = merged_df['# of Listed Properties']/merged_df['# of Sold Properties']
merged_df

Unnamed: 0,Suburb,City,Province,Area,# of Listed Properties,# of Sold Properties,Inventory
0,bedfordview,johannesburg,gauteng,bedford-gardens,271,1,271.000000
1,bedfordview,johannesburg,gauteng,bedford-park,21,2,10.500000
2,bedfordview,johannesburg,gauteng,bedfordview,332,10,33.200000
3,bedfordview,johannesburg,gauteng,essexwold,13,1,13.000000
4,bedfordview,johannesburg,gauteng,meadowbrook,14,1,14.000000
...,...,...,...,...,...,...,...
166,sandton-and-bryanston-north,johannesburg,gauteng,sandhurst,35,5,7.000000
167,sandton-and-bryanston-north,johannesburg,gauteng,sandown,269,6,44.833333
168,sandton-and-bryanston-north,johannesburg,gauteng,strathavon,41,4,10.250000
169,sandton-and-bryanston-north,johannesburg,gauteng,wendywood,29,1,29.000000


In [33]:
merged_df.to_csv('Property Market 30 days.csv', index=False)