In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# Read the dataset from a .txt file
data = pd.read_csv('deliverytime.txt', delimiter=',')  # Adjust delimiter as needed

# Print the first few rows of the dataset
print(data.head())


In [None]:
# Get and print information about the dataset
print(data.info())

In [None]:
# Check for null values in the dataset
null_values = data.isnull().sum()
print("Null values in each column:")
print(null_values)

In [None]:
import pandas as pd
import numpy as np

# Read the dataset from a .txt file
data = pd.read_csv('deliverytime.txt', delimiter=',')  # Adjust delimiter as needed

# Haversine formula to calculate the distance between two points
def haversine_vectorized(lat1, lon1, lat2, lon2):
    # Convert degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    # Earth's radius in kilometers
    radius = 6371
    distance = radius * c
    return distance

# Use the correct column names for latitude and longitude
latitudes = data['Restaurant_latitude'].values  # Corrected spelling
longitudes = data['Restaurant_longitude'].values  # Corrected spelling

# Reference point (first restaurant)
reference_lat = latitudes[0]
reference_lon = longitudes[0]

# Calculate distances from all restaurants to the reference restaurant
data['distance_to_first_restaurant'] = haversine_vectorized(latitudes, longitudes, reference_lat, reference_lon)

# Print the updated DataFrame with distances
print("Updated DataFrame with distances:")
print(data[['Restaurant_latitude', 'Restaurant_longitude', 'distance_to_first_restaurant']].head())

In [None]:
import pandas as pd
import numpy as np

# Set the earth's radius (in kilometers)
R = 6371

# Convert degrees to radians
def deg_to_rad(degrees):
    return degrees * (np.pi / 180)

# Function to calculate the distance between two points using the haversine formula
def distcalculate(lat1, lon1, lat2, lon2):
    d_lat = deg_to_rad(lat2 - lat1)
    d_lon = deg_to_rad(lon2 - lon1)
    a = np.sin(d_lat / 2) ** 2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Read the dataset from a .txt file
data = pd.read_csv('deliverytime.txt', delimiter=',')  # Adjust delimiter as needed

# Extract latitude and longitude columns as NumPy arrays
restaurant_latitudes = data['Restaurant_latitude'].values
restaurant_longitudes = data['Restaurant_longitude'].values
delivery_latitudes = data['Delivery_location_latitude'].values
delivery_longitudes = data['Delivery_location_longitude'].values

# Calculate the distances in a vectorized manner
distances = distcalculate(restaurant_latitudes, restaurant_longitudes, delivery_latitudes, delivery_longitudes)

# Assign the distances back to the DataFrame
data['distance'] = distances

# Print the updated DataFrame with distances
print(data[['Restaurant_latitude', 'Restaurant_longitude', 'Delivery_location_latitude', 'Delivery_location_longitude', 'distance']].head())

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import statsmodels.api as sm

# Sample DataFrame creation (replace this with your actual DataFrame)
# data = pd.DataFrame({
#     'distance': [1, 2, 3, 4, 5],  # Example distances
#     'time_taken(min)': [10, 20, 30, 40, 50]  # Example time taken values
# })

# Assuming 'data' is your DataFrame with 'distance' and 'time_taken(min)' columns already available
# Fit a linear regression model to the data
X = data['distance']
y = data['time_taken(min)']
X = sm.add_constant(X)  # Adds a constant term to the predictor
model = sm.OLS(y, X).fit()  # Fit the model
predictions = model.predict(X)  # Make predictions

# Add predictions to the DataFrame for plotting
data['predicted_time'] = predictions

# Create a scatter plot with Plotly Express
fig = px.scatter(data_frame=data,
                 x='distance',
                 y='time_taken(min)',  # Use the exact column name
                 size='time_taken(min)',  # Use the exact column name for size
                 title='Relationship between Distance and Time Taken',
                 labels={'distance': 'Distance (km)', 'time_taken(min)': 'Time Taken (min)'},
                 hover_name=data.index,  # Show index on hover
                 size_max=20)  # Maximum marker size

# Add OLS trendline
fig.add_scatter(x=data['distance'], y=data['predicted_time'], mode='lines', name='OLS Trendline', line=dict(color='red'))

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your DataFrame with 'distance' and 'Time_taken(min)' columns already available

# Create a scatter plot with Plotly Express
figure = px.scatter(data_frame=data,
                    x="distance",
                    y="Time_taken(min)",
                    size="Time_taken(min)",
                    trendline="ols",
                    title="Relationship Between Distance and Time Taken")

# Update y-axis to show specific ticks
figure.update_yaxes(tickvals=[0, 10, 20, 30, 40, 50, 60],  # Adjust this list as needed
                    title_text="Time Taken (min)")

# Show the plot
figure.show()


In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your DataFrame with 'Delivery_person_Age', 'Time_taken(min)', and 'distance' columns

# Create a scatter plot with Plotly Express
figure_age_time = px.scatter(data_frame=data,
                              x="Delivery_person_Age",  # Corrected column name
                              y="Time_taken(min)",
                              size="Time_taken(min)",
                              color="distance",  # Color by distance
                              trendline="ols",
                              title="Relationship Between Time Taken and Age",
                              labels={"Delivery_person_Age": "Delivery Person Age",
                                      "Time_taken(min)": "Time Taken (min)",
                                      "distance": "Distance (km)"})  # Custom labels

# Update y-axis to show specific ticks (optional)
max_time = data["Time_taken(min)"].max()
tickvals = list(range(0, int(max_time) + 10, 10))  # Generates ticks from 0 to max_time with an interval of 10
figure_age_time.update_yaxes(tickvals=tickvals, title_text="Time Taken (min)")

# Show the plot
figure_age_time.show()


In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your DataFrame with 'Delivery_person_Ratings' and 'Time_taken(min)' columns

# Create a scatter plot with Plotly Express
figure_ratings_time = px.scatter(data_frame=data,
                                  x="Delivery_person_Ratings",  # X-axis set to Delivery_person_Ratings
                                  y="Time_taken(min)",
                                  size="Time_taken(min)",
                                  color="distance",  # Color by distance
                                  trendline="ols",
                                  title="Relationship Between Time Taken and Delivery Person Ratings",
                                  labels={"Delivery_person_Ratings": "Delivery Person Ratings",
                                          "Time_taken(min)": "Time Taken (min)",
                                          "distance": "Distance (km)"})  # Custom labels

# Update y-axis to show specific ticks (optional)
max_time = data["Time_taken(min)"].max()
tickvals = list(range(0, int(max_time) + 10, 10))  # Generates ticks from 0 to max_time with an interval of 10
figure_ratings_time.update_yaxes(tickvals=tickvals, title_text="Time Taken (min)")

# Show the plot
figure_ratings_time.show()


In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your DataFrame with 'Type_of_vehicle', 'Time_taken(min)', and 'Type_of_order' columns

# Create a box plot with Plotly Express
figure_box = px.box(data_frame=data,
                    x="Type_of_vehicle",  # X-axis set to Type_of_vehicle
                    y="Time_taken(min)",
                    color="Type_of_order",  # Color by Type_of_order
                    title="Box Plot of Time Taken by Type of Vehicle and Order Type",
                    labels={"Type_of_vehicle": "Type of Vehicle",
                            "Time_taken(min)": "Time Taken (min)",
                            "Type_of_order": "Type of Order"})  # Custom labels

# Show the plot
figure_box.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Assuming 'data' is your DataFrame containing the relevant columns
# Prepare the features and target variable
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])  # Features
y = np.array(data['Time_taken(min)'])  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

# Reshape the input data for LSTM
# LSTM expects input in the shape of (samples, time steps, features)
# Here we can set time steps to 1 since we are predicting based on static features
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))  # LSTM layer
model.add(Dense(1))  # Output layer for regression

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Prepare the features and target variable
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])  # Features
y = np.array(data['Time_taken(min)'])  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Print column names to debug
print(data.columns)

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Prepare the features and target variable
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])  # Update if necessary
y = np.array(data['Time_taken(min)'])  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import pandas as pd
import numpy as np

# Set the earth's radius (in kilometers)
R = 6371

# Convert degrees to radians
def deg_to_rad(degrees):
    return degrees * (np.pi / 180)

# Function to calculate the distance between two points using the haversine formula
def distcalculate(lat1, lon1, lat2, lon2):
    d_lat = deg_to_rad(lat2 - lat1)
    d_lon = deg_to_rad(lon2 - lon1)
    a = np.sin(d_lat / 2) ** 2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Read the dataset from a .txt file
data = pd.read_csv('deliverytime.txt', delimiter=',')  # Adjust delimiter as needed

# Extract latitude and longitude columns as NumPy arrays
restaurant_latitudes = data['Restaurant_latitude'].values
restaurant_longitudes = data['Restaurant_longitude'].values
delivery_latitudes = data['Delivery_location_latitude'].values
delivery_longitudes = data['Delivery_location_longitude'].values

# Calculate the distances in a vectorized manner
distances = distcalculate(restaurant_latitudes, restaurant_longitudes, delivery_latitudes, delivery_longitudes)

# Assign the distances back to the DataFrame
data['distance'] = distances

# Print the updated DataFrame with distances
print(data[['Restaurant_latitude', 'Restaurant_longitude', 'Delivery_location_latitude', 'Delivery_location_longitude', 'distance']].head())

In [None]:
import pandas as pd

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Print column names to debug
print("Columns in the DataFrame:", data.columns)

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Print columns again to confirm stripping worked
print("Stripped Columns:", data.columns)


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Check if the distance column exists
if 'distances' in data.columns:
    print("Distance column found!")
else:
    print("Distance column not found. Available columns:", data.columns.tolist())

# If the distance was calculated previously, ensure to check the DataFrame
# For example, if you calculated it like this:
# data['distances'] = your_distance_calculation_function()

# Print the first few rows to see if distances are present
print(data.head())

# Prepare the features and target variable if distances are present
if 'distances' in data.columns:
    X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distances']])
    y = np.array(data['Time_taken(min)'])  # Target variable

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import pandas as pd
import numpy as np

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Define the Haversine function
def haversine(lat1, lon1, lat2, lon2):
    # Convert degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

# Calculate the distance and add it as a new column
data['distance'] = haversine(data['Restaurant_latitude'], data['Restaurant_longitude'],
                              data['Delivery_location_latitude'], data['Delivery_location_longitude'])

# Verify that the distance column has been added
print("Distance column added. Available columns:", data.columns.tolist())
print(data[['ID', 'distance']].head())  # Display the ID and distance columns


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Define the Haversine function (if not already defined)
def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

# Calculate the distance and add it as a new column
data['distance'] = haversine(data['Restaurant_latitude'], data['Restaurant_longitude'],
                              data['Delivery_location_latitude'], data['Delivery_location_longitude'])

# Prepare features and target variable
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])
y = np.array(data['Time_taken(min)'])  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data for LSTM (samples, time steps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build the LSTM model
model = Sequential()
model.add(LSTM(20, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))  # Output layer


In [None]:
pip list

In [None]:
pip install keras.models

In [None]:
pip install keras.layers

In [None]:
pip install keras

In [None]:
pip install tensorflow

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Example data preparation (assuming you already have the distance calculated)
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])
y = np.array(data['Time_taken(min)'])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape for LSTM
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test))

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load your data
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path
data.columns = data.columns.str.strip()

# Prepare features and target variable
X = data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']]
y = data['Time_taken(min)']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


In [None]:
import pandas as pd
import numpy as np

# Load your data into a DataFrame
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Define the Haversine function
def haversine(lat1, lon1, lat2, lon2):
    # Convert degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

# Calculate the distance and add it as a new column
data['distance'] = haversine(data['Restaurant_latitude'], data['Restaurant_longitude'],
                              data['Delivery_location_latitude'], data['Delivery_location_longitude'])

# Verify that the distance column has been added
print("Distance column added. Available columns:", data.columns.tolist())
print(data[['ID', 'distance']].head())  # Display the ID and distance columns

data['distance'] = data.apply(calculate_distance, axis=1)

# Save the updated DataFrame back to the text file
data.to_txt('deliverytime.txt', index=False)

In [None]:
#splitting data
from sklearn.model_selection import train_test_split
x = np.array(data[["Delivery_person_Age",
                   "Delivery_person_Ratings",
                   "distance"]])
y = np.array(data[["Time_taken(min)"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y,
                                                test_size=0.10,
                                                random_state=42)

# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()

In [None]:
conda update mkl

In [None]:
conda install numpy

In [None]:
conda install -c intel mkl

In [None]:
conda install tensorflow

In [None]:
conda install sklearn.model_selection

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
conda install -c anaconda keras

In [None]:
pip uninstall mpi4py

In [None]:
#splitting data
from sklearn.model_selection import train_test_split
x = np.array(data[["Delivery_person_Age",
                   "Delivery_person_Ratings",
                   "distance"]])
y = np.array(data[["Time_taken(min)"]])
xtrain, xtest, ytrain, ytest = train_test_split(x, y,
                                                test_size=0.10,
                                                random_state=42)

# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()

In [None]:
import pandas as pd
import numpy as np

# Load your existing data
data = pd.read_csv('deliverytime.txt')  # Replace with your actual file path
data.columns = data.columns.str.strip()  # Clean up any whitespace

# Print the column names to understand the structure
print(data.columns)

# Assuming you found the correct column names for latitude and longitude
def calculate_distance(row):
    return np.sqrt((row['end_latitude'] - row['start_latitude'])**2 +
                   (row['end_longitude'] - row['start_longitude'])**2)

# Add the distance column
data['distance'] = data.apply(calculate_distance, axis=1)

# Save the updated DataFrame back to the text file
data.to_csv('deliverytime.txt', index=False)

# Proceed with your model code
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Prepare features and target variable
X = data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']]
y = data['Time_taken(min)']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


In [None]:
conda update mkl

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model
model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32)

# Evaluate the model
loss = model.evaluate(X_test_reshaped, y_test)
print("Test Loss:", loss)

# Prediction based on user input
a = int(input("Age of Delivery Partner: "))
b = float(input("Ratings of Previous Deliveries: "))
c = int(input("Total Distance: "))

# Prepare the features for prediction
features = np.array([[a, b, c]])  # Make sure to use double brackets

# Reshape the features for LSTM
features_reshaped = features.reshape(features.shape[0], 1, features.shape[1])

# Make the prediction
predicted_time = model.predict(features_reshaped)

print("Predicted Delivery Time in Minutes:", predicted_time[0][0])


In [None]:
pip install --upgrade tensorflow keras

In [None]:
python3 -m venv env
source env/bin/activate
pip install pandas numpy scikit-learn tensorflow

In [None]:
python3 -m venv env
source env/bin/activate

In [None]:
pip install pandas numpy scikit-learn tensorflow

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('deliverytime.txt', delimiter=',')

# Check the column names
print(data.columns)

# Define features (X) and target (y)
X = np.array(data[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])
y = np.array(data[['Time_taken(min)']])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Reshape the input data for LSTM
# LSTM expects input shape as (samples, time steps, features)
# Since we don't have a time dimension in your features, we can set time steps to 1.
X_train_reshaped = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))  # Add more layers or units as needed
model.add(Dense(1))  # Final output layer for regression

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Summary of the model
model.summary()

# Fit the model (you can adjust epochs and batch_size as needed)
model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32)

# Evaluate the model (you can change the metric if needed)
loss = model.evaluate(X_test_reshaped, y_test)
print("Test Loss:", loss)


In [None]:
from google.colab import files

# This will prompt you to upload the file from your local machine
uploaded = files.upload()

# Once uploaded, load the dataset using pandas
import pandas as pd
import io

# Adjust 'deliverytime.txt' to match the uploaded filename
data = pd.read_csv(io.BytesIO(uploaded['deliverytime.txt']), delimiter=',')
