In [27]:
import pandas as pd
import mysql.connector
from datetime import datetime

# Function to classify Bus_Type
def classify_bus_type(bus_type):
    bus_type = bus_type.lower()
    if 'a/c' in bus_type:
        return 'A/C'
    elif 'non a/c' in bus_type:
        return 'NON A/C'
    else:
        return 'NON A/C'

# List of CSV file paths and corresponding state names
csv_files = {
    "ahemedabad.csv": "Gujarat",
    "Andhra_bus_details.csv": "Andhra Pradesh",
    "bangalore_bus_details.csv": "Karnataka",
    "chennai_bus_details.csv": "Tamil Nadu",
    "Himachal_bus_details.csv": "Himachal Pradesh",
    "Kerala_bus_details.csv": "Kerala",
    "pune_bus_routes.csv": "Maharashtra",
    "rajasthan_bus_details.csv": "Rajasthan",
    "telungana_bus_details.csv": "Telangana",
    "uttarpradesh_bus_details.csv": "Uttar Pradesh"
}

# Read each CSV file into a DataFrame and concatenate them
df_list = []
for file, state in csv_files.items():
    df = pd.read_csv(file)
    df['State'] = state  # Add state name from the file name
    df_list.append(df)

# Concatenate all DataFrames
combined_df = pd.concat(df_list, ignore_index=True)

# Clean data: remove 'INR ' from Price and extract seat numbers from strings
combined_df['Price'] = combined_df['Price'].str.replace('INR ', '')
combined_df['Seat_Availability'] = combined_df['Seat_Availability'].str.extract(r'(\d+)')

# Convert Price to decimal and Seat_Availability to integer
combined_df['Price'] = pd.to_numeric(combined_df['Price'], errors='coerce').fillna(0).astype(float)
combined_df['Seat_Availability'] = combined_df['Seat_Availability'].fillna(0).astype(int)

# Drop rows with missing Bus_Name or Bus_Type
combined_df = combined_df.dropna(subset=['Bus_Name', 'Bus_Type'])

# Add auto-increment id column
combined_df.insert(0, 'id', combined_df.index + 1)

# Classify Bus_Type
combined_df['Bus_Type'] = combined_df['Bus_Type'].apply(classify_bus_type)

# MySQL connection
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    database="RED_BUS"
)

mycursor = mydb.cursor()

# Create table query
create_table_query = """CREATE TABLE IF NOT EXISTS RED_BUS_R(
    id INT AUTO_INCREMENT PRIMARY KEY,
    route_name TEXT NOT NULL,
    route_link TEXT,
    busname TEXT NOT NULL,
    bustype TEXT NOT NULL,
    departing_time VARCHAR(8) NOT NULL,
    duration TEXT NOT NULL,
    reaching_time VARCHAR(8) NOT NULL,
    star_rating FLOAT,
    price DECIMAL(10, 2) NOT NULL,
    seats_available INT NOT NULL,
    state VARCHAR(255) NOT NULL
    )"""

mycursor.execute(create_table_query)

# Step 1: Add the state column to the table if it doesn't exist (this is now included in the table definition)
# No need to alter the table since it's already included above

# Step 2: Insert data
insert_query = """
INSERT INTO RED_BUS_R (id, route_name, route_link, busname, bustype, departing_time, duration, reaching_time, star_rating, price, seats_available, state)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

# Insert each row from the DataFrame
for i, row in combined_df.iterrows():
    data = (
        row['id'],
        row['Route_Name'],
        row['Route_Link'],
        row['Bus_Name'],
        row['Bus_Type'],
        row['Departing_Time'],
        row['Duration'],
        row['Reaching_Time'],
        row['Star_Rating'],
        row['Price'],
        row['Seat_Availability'],
        row['State']  # Insert the state for each row
    )
    mycursor.execute(insert_query, data)

# Commit the changes
mydb.commit()

print("Data inserted successfully.")


Data inserted successfully.


In [21]:
import pandas as pd

# Assuming df is your combined DataFrame
distinct_bus_types =combined_df['Bus_Type'].unique()

distinct_bus_types

array(['A/C Sleeper (2+1)', 'NON A/C Sleeper (2+1)',
       'Urbania A/C Seater (2+1)', 'A/C Seater / Sleeper (2+1)',
       'VE A/C Sleeper (2+1)', 'Volvo AC Seater (2+2)',
       'Bharat Benz A/C Sleeper (2+1)', 'Volvo A/C Seater (2+2)',
       'Volvo A/C Sleeper (2+1)', 'NON AC Seater / Sleeper 2+1',
       'NON A/C Seater / Sleeper (2+2)', 'Express Non AC Seater 2+3',
       'Benz A/C Seater (2+2)', 'AC Sleeper (2+1)',
       'Bharat Benz A/C Seater (2+1)', 'Bharat Benz A/C Sleeper (1+1)',
       'Deluxe AC Seater 2+2', 'Volvo Multi Axle B9R A/C Sleeper (2+1)',
       'Bharat Benz A/C Seater (2+2)', 'A/C Seater/Sleeper (2+1)',
       'Non A/C Seater / Sleeper (2+1)', 'NON A/C Sleeper (1+2)',
       'NON AC Seater/ Sleeper (2+1)', 'NON A/C Seater/ Sleeper (2+1)',
       'Volvo Multi-Axle I-Shift B11R Semi Sleeper (2+2)',
       'Volvo Multi-Axle A/C Sleeper (2+1)',
       'Volvo Multi-Axle Sleeper A/C (2+1)',
       'A/C Volvo B11R Multi-Axle Sleeper (2+1)',
       'VE A/C Seater / 