In [2]:
import pandas as pd
import glob

# Define the directory where your CSV files are stored
csv_directory = "./PassengerVehicle_Stats/"  # Replace with the directory path

# Use glob to load all CSV files in the directory into a list
csv_files = glob.glob(csv_directory + "*.csv")
try:
        # Check if we have 9 files and load them into a list
    if len(csv_files) == 9:
        print(f"Found {len(csv_files)} CSV files.")
        
        # Load all CSV files into a list of DataFrames
        csv_data_list = [pd.read_csv(file) for file in csv_files]

        # Concatenate the list of DataFrames into a single DataFrame
        vehicles_df = pd.concat(csv_data_list, ignore_index=True)


        # Optionally, save the combined DataFrame to a new CSV file
        vehicles_df.to_csv("combined_vehicles_data.csv", index=False)
        print("Combined DataFrame saved to 'combined_vehicles_data.csv'.")
    else:
        print(f"Expected 9 CSV files, but found {len(csv_files)}. Please check your directory.")

except Exception as e:
    print(f"Error occurred: {e}")

Found 9 CSV files.
Combined DataFrame saved to 'combined_vehicles_data.csv'.


In [3]:
#Data types of the variables
print(vehicles_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16602 entries, 0 to 16601
Data columns (total 17 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Unnamed: 0                          16602 non-null  int64  
 1   Public Vehicle Number               16602 non-null  int64  
 2   Status                              16602 non-null  object 
 3   Vehicle Make                        14685 non-null  object 
 4   Vehicle Model                       14639 non-null  object 
 5   Vehicle Model Year                  14660 non-null  float64
 6   Vehicle Color                       14616 non-null  object 
 7   Vehicle Fuel Source                 16602 non-null  object 
 8   Wheelchair Accessible               16602 non-null  object 
 9   Company Name                        16602 non-null  object 
 10  Address                             14816 non-null  object 
 11  City                                14816

In [4]:
#First few records of the vehicles data frame
print(vehicles_df.head())

   Unnamed: 0  Public Vehicle Number    Status Vehicle Make Vehicle Model  \
0        1286                  12009  RESERVED    CHEVROLET       EXPRESS   
1        2095                  12248  INACTIVE     MERCEDES      SPRINTER   
2        7950                  13527  INACTIVE     VAN HOOL         TD925   
3        8700                  12248  INACTIVE     MERCEDES      SPRINTER   
4        9359                  13528  INACTIVE     VAN HOOL         TD925   

   Vehicle Model Year Vehicle Color Vehicle Fuel Source Wheelchair Accessible  \
0              2014.0         BLACK          Bio-Diesel                     N   
1              2010.0        SILVER          Bio-Diesel                     N   
2              2008.0           RED          Bio-Diesel                     N   
3              2010.0        SILVER          Bio-Diesel                     N   
4              2008.0           RED          Bio-Diesel                     N   

                              Company Name        

In [5]:
# All the variables consisit with qualitative nature
#Identifying the mode of variables
qualitative_columns = vehicles_df.select_dtypes(include=['object', 'category']).columns
mode_values = vehicles_df[qualitative_columns.drop('Record ID')].mode()
print("Mode values for each qualitative variable:")
print(mode_values)


Mode values for each qualitative variable:
   Status Vehicle Make Vehicle Model Vehicle Color Vehicle Fuel Source  \
0  ACTIVE       TOYOTA         CAMRY         WHITE            Gasoline   

  Wheelchair Accessible          Company Name                 Address  \
0                     N  CITY OF CHICAGO BACP  3800 N. MILWAUKEE AVE.   

      City State Taxi Affiliation Taxi Medallion License Management   
0  CHICAGO    IL   DIS-AFFILIATED                      Owner Manager  
