In [1]:
# Import libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split


# Load the scraped data
df = pd.read_csv("delhiclean.csv")

# Define the features and target
X = df[["distance", "ratings", "wifi"]]
y = df["price"]

# Normalize the features
X = (X - X.mean()) / X.std()

# Reshape the data into sequences of length 10
# Each sequence contains 10 consecutive days of data
X_seq = np.array([X[i:i+10] for i in range(len(X)-10)])
y_seq = np.array([y[i+10] for i in range(len(y)-10)])

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# Create and fit the model
model = keras.Sequential([
  layers.SimpleRNN(32, activation="relu", input_shape=(10, 3)),
  layers.Dense(1)
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model performance
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse:.2f}")
print(f"R2: {r2:.2f}")

# Predict the price for the next day
# Assume we have a new hotel with distance = 5 km, ratings = 3, and wifi = 1
new_hotel = np.array([[5, 3, 1]])
new_hotel = (new_hotel - X.mean()) / X.std()
new_hotel = new_hotel.reshape(1, 3, 1)
new_hotel = new_hotel.reshape(1, 1, 3)
new_price = model.predict(new_hotel)
print(f"Predicted price for the new hotel: {new_price[0][0]:.2f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MSE: 1686827.64
R2: -2.24


ValueError: Length of values (1) does not match length of index (3)

In [6]:
# Import libraries
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# Load the scraped data
df = pd.read_csv("delhiclean.csv")

# Define the features and target
X = df[["distance", "ratings", "wifi"]]
y = df["price"]

# Normalize the features
X = (X - X.mean()) / X.std()

# Reshape the data into sequences of length 10
# Each sequence contains 10 consecutive days of data
X_seq = np.array([X[i:i+10] for i in range(len(X)-10)])
y_seq = np.array([y[i+10] for i in range(len(y)-10)])

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=2)

# Create and fit the model
model = keras.Sequential([
    layers.SimpleRNN(32, activation="relu", input_shape=(10, 3)),
    layers.Dense(1)
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Get the user-defined price range
lower_price_limit = float(input("Enter lower price limit: "))
upper_price_limit = float(input("Enter upper price limit: "))

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse:.2f}")
print(f"R2: {r2:.2f}")

# Predict the price for the next day
# Assume we have a new hotel with distance = 5 km, ratings = 3, and wifi = 1
#new_hotel = np.array([[5, 3, 1]])
#new_hotel = (new_hotel - X.mean()) / X.std()
#new_hotel = new_hotel.reshape(1, 10, 3)
#new_price = model.predict(new_hotel)

# Check if the predicted price is within the user-defined price range
#if lower_price_limit <= new_price[0][0] <= upper_price_limit:
#    print(f"Predicted price for the new hotel: {new_price[0][0]:.2f}")
#else:
 #   print(
  #      f"Sorry, the predicted price of {new_price[0][0]:.2f} is not within your price range.")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MSE: 1269131.40
R2: -2.70


In [None]:
#added more features
import requests 
from bs4 import BeautifulSoup as Soup
import pandas as pd
def delhi():
    df=pd.DataFrame()
    headers = {'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}
    for page in range (1,6):
        req = requests.get("https://www.oyorooms.com/hotels-in-delhi/", headers=headers).text
        soup = Soup(req, 'html.parser')
        apts = soup.find_all("div", {"class": "oyo-row oyo-row--no-spacing listingHotelDescription"})
        l = []
        obj = {}
        apts1 = soup.find_all(itemprop='url')
        for a in range(0, len(apts)):
            try:
                obj["pricing"] = apts[a].find("span", {"class": "listingPrice__finalPrice"}).text
            except: obj["pricing"] = None
            try:
                obj["Distance"] = apts[a].find("span", {"class": "listingHotelDescription__distanceText"}).text
            except:
                obj["Distance"] = None
            try:
                link= apts1[a].get('content')
                req1 = requests.get(link, headers=headers).text
                soup2 = Soup(req1, 'html.parser')
                obj["amneties"] = soup2.find("div", {"class": "c-2cojj"}).text
            except:
                obj["amneties"] = None
            try:
                obj["ratings"] = apts[a].find("div", {"class": "hotelRating"}).text
            except:
                obj["ratings"] = None
            
            l.append(obj)
            obj = {}
        for i in l:
            price = i["pricing"]
            distance = i["Distance"]
            amneties= i["amneties"]
            ratings= i["ratings"]
            df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
#dataCleaning
    df["price"] = df["price"].str.replace(r"₹", "")
    df["ratings"] = df["ratings"].str.replace(r"₹", "")
    df['price'] = pd.to_numeric(df['price'])
    df['distance']=df['distance'].str.replace(r"km","")
    df['distance'] = pd.to_numeric(df['distance'])
    df[['ratings', 'last_name','y','x']] = df['ratings'].str.split(' ', expand=True)
    df = df.drop(columns=['last_name','y','x'],axis=1)
    df['ratings'] = pd.to_numeric(df['ratings'],errors='coerce')
    import re
    df['wifi'] = df['amneties'].str.extract(r'(Wifi)', expand=False,flags=re.IGNORECASE)
    df['AC'] = df['amneties'].str.extract(r'(AC)', expand=False)
    df['amneties']=df['amneties'].str.replace(r"CCTV","")
    df['TV'] = df['amneties'].str.extract(r'(TV)', expand=False)
    df['Queen Sized Bed'] = df['amneties'].str.extract(r'(Queen Sized Bed)', expand=False)
    df['Geyser'] = df['amneties'].str.extract(r'(Geyser)', expand=False)
    df['King Sized Bed'] = df['amneties'].str.extract(r'(King Sized Bed)', expand=False)
    df['Power backup'] = df['amneties'].str.extract(r'(Power backup)', expand=False)
    df['Mini Fridge'] = df['amneties'].str.extract(r'(Mini Fridge)', expand=False)
    df= df.drop(columns=['amneties'],axis= 1)
    df['AC'] = df['AC'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['TV'] = df['TV'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['wifi'] = df['wifi'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['Queen Sized Bed'] = df['Queen Sized Bed'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['King Sized Bed'] = df['King Sized Bed'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['Geyser'] = df['Geyser'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['Power backup'] = df['Power backup'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['Mini Fridge'] = df['Mini Fridge'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['Mini Fridge'] = df['Mini Fridge'].fillna('').replace({'': 0, '[^0]': 1}, regex=True)
    df['ratings'] = df['ratings'].fillna(df['ratings'].mean())
    return df
df= delhi()
df

  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ignore_index=True)
  df = df.append({"price": price,"distance": distance,"amneties":amneties,"ratings":ratings}, ig

Unnamed: 0,price,distance,ratings,wifi,AC,TV,Queen Sized Bed,Geyser,King Sized Bed,Power backup,Mini Fridge
0,576,6.5,3.100000,1,1,1,0,0,0,0,0
1,649,3.7,3.200000,1,1,1,0,0,0,1,0
2,2173,3.4,5.000000,1,1,1,0,0,0,0,1
3,999,3.6,3.800000,1,1,1,0,1,0,1,0
4,892,3.6,2.700000,1,1,1,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
95,1499,5.0,4.000000,1,1,1,0,1,0,1,0
96,946,4.5,3.576471,1,1,1,0,1,0,1,0
97,722,2.0,3.576471,1,1,1,0,1,0,1,0
98,674,7.0,2.700000,1,1,1,0,0,0,0,0
