## Import module

In [1]:
from sqlalchemy import create_engine, text, inspect
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
import json
import os
import time

### Load cradentials

In [2]:
load_dotenv()

db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')


### Create engine for connecting database.

In [3]:
DATABASE_URL = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
engine = create_engine(DATABASE_URL)

### DATABASE AND TABLE INFORMATIONS.

In [4]:
table_main = 'hotel_info_all'

### Database Connecting test query

In [5]:
try:
    with engine.connect() as connection:
        result = connection.execute(text("SELECT DATABASE()"))
        db_name = result.fetchone()[0]
        print(f"Connected to database: {db_name}")
except Exception as e:
    print(f"Error: {e}")

Connected to database: itt_master_contents


### Print All existing table list in the database.

In [6]:
try:
    inspector = inspect(engine)
    tables = inspector.get_table_names()
    if tables:
        print(f"Tables in the databse '{db_name}'\n\n")
        for table in tables:
            print(table)
    else:
        print(f"No tables found in the database '{db_name}'")
except Exception as e:
    print(f"Error: {e}")


Tables in the databse 'itt_master_contents'


hotel_info_all
hotels_info_with_gidestination_code
innova_hotels_main
vervotech_ProviderFamily
vervotech_hotel_list
vervotech_hotel_map_new
vervotech_hotel_map_update
vervotech_mapping
vervotech_update_data_info


### Function for get all data.

In [7]:
def get_all_info_in_data_table(table, engine):
    query = f"SELECT * FROM {table};"
    df = pd.read_sql(query, engine)
    return df
    
# table_main = 'hotel_info_all'
# get_data = get_all_info_in_data_table(table_main, engine)
# # print(get_data)
                                      
    

### Function for specifiq data for one row.

In [8]:
def get_specifiq_data_from_system_id(table, systemid, engine):
    query = f"SELECT * FROM {table} WHERE SystemId = '{systemid}';"
    df = pd.read_sql(query, engine)
    print(df)

table = 'hotel_info_all'
systemId = '67413'
data = get_specifiq_data_from_system_id(table=table, systemid=systemId, engine=engine)

# print(data)

     Id GiDestinationId                                      HotelName  \
0  7773          101196  DoubleTree by Hilton Hotel Denver Westminster   

   SystemId  Rating         City          Address1  \
0     67413     3.0  Westminster  8773 Yates Drive   

                            Address2     ZipCode  \
0  Westminster , Colorado 80031-3680  80031-3680   

                                            ImageUrl   Latitude   Longitude  \
0  https://static.giinfotech.ae/medianew/thumbnai...  39.855766 -105.051233   

     CountryName CountryCode StatusUpdate  \
0  United States          US         Done   

                                           HotelInfo StatusUpdateHotelInfo  \
0  {"name": "DoubleTree by Hilton Hotel Denver We...             Done Json   

            CreatedAt          ModifiedOn  
0 2024-11-01 18:46:42 2024-11-04 12:01:11  


### Function for get a system Id where dataupdate json file done.

In [9]:
def get_system_id_list(table, column, engine):
    try: 
        query = f"SELECT {column} FROM {table} WHERE StatusUpdateHotelInfo = 'Done Json' AND CountryCode = 'AE';"
        df = pd.read_sql(query, engine)
        # data_all = df[column].tolist()
        # print(len(data_all))
        data = list(set(df[column].tolist()))
        # print(data)
        return data
    except Exception as e:
        print(f"Error fetching column info: {e}")

table = 'hotel_info_all'
column = 'SystemId'

list_for_system_id_data = get_system_id_list(table,column,engine)


print(len(list_for_system_id_data))

2573


### Function for Create dictionary one row follow systemId

In [31]:
def get_specifiq_data_from_system_id(table, systemid, engine):
    # SQL query to fetch data for a specific SystemId
    query = f"SELECT * FROM {table} WHERE SystemId = '{systemid}';"
    df = pd.read_sql(query, engine)

    if df.empty:
        print("No data found for the provided SystemId.")
        return None

    # Assuming only one row will be returned for a specific SystemId
    hotel_data = df.iloc[0].to_dict()
    print(hotel_data)

get_specifiq_data_from_system_id(table,'1081355', engine)

{'Id': 9772, 'GiDestinationId': '101653', 'HotelName': 'Motel Bienvenue', 'SystemId': 1081355, 'Rating': 0.0, 'City': 'Rimouski', 'Address1': '1057 rue du Phare', 'Address2': 'Rimouski , Québec G5M 1L9', 'ZipCode': 'G5M 1L9', 'ImageUrl': '', 'Latitude': 48.445, 'Longitude': -68.5591, 'CountryName': 'Canada', 'CountryCode': 'CA', 'StatusUpdate': 'Done', 'HotelInfo': '{"name": "Motel Bienvenue", "rating": 0, "address": {"line1": "1057 rue du Phare", "line2": "Rimouski , Québec G5M 1L9", "zipCode": "G5M 1L9", "cityCode": null, "cityName": "Rimouski", "stateCode": "", "stateName": null, "countryCode": "CA", "countryName": "Canada"}, "checkIn": null, "contact": {"faxNo": null, "phoneNo": "+14187244338", "website": "https://www.motelbienvenue.com/en/"}, "geocode": {"lat": 48.445, "lon": -68.5591}, "checkOut": null, "imageUrl": "", "systemId": "1081355", "imageUrls": [], "attractions": null, "currencyCode": null, "tripAdvisorUrl": null, "giDestinationId": 7511, "tripAdvisorRating": 0, "master

### Function for

In [39]:
def get_specifiq_data_from_system_id(table, systemid, engine):
    # SQL query to fetch data for a specific SystemId
    query = f"SELECT * FROM {table} WHERE SystemId = '{systemid}';"
    df = pd.read_sql(query, engine)

    if df.empty:
        print("No data found for the provided SystemId.")
        return None

    # Convert the row to a dictionary and handle non-serializable types
    hotel_data = df.iloc[0].to_dict()

    # Convert non-serializable types (like Timestamp) to strings
    for key, value in hotel_data.items():
        if isinstance(value, pd.Timestamp):
            hotel_data[key] = value.isoformat()  # Convert Timestamp to ISO string

    # Print in JSON format
    print(json.dumps(hotel_data, indent=4))

# Example usage
get_specifiq_data_from_system_id(table, '9042', engine)

{
    "Id": 930,
    "GiDestinationId": "100037",
    "HotelName": "Bienville House",
    "SystemId": 9042,
    "Rating": 3.0,
    "City": "New Orleans",
    "Address1": "320 Decatur Street",
    "Address2": "New Orleans , Louisiana 70130",
    "ZipCode": "70130",
    "ImageUrl": "https://static.giinfotech.ae/medianew/thumbnail/9042/1091647.jpg",
    "Latitude": 29.953945,
    "Longitude": -90.065395,
    "CountryName": "United States",
    "CountryCode": "US",
    "StatusUpdate": "Done",
    "HotelInfo": "{\"name\": \"Bienville House\", \"rating\": 3, \"address\": {\"line1\": \"320 Decatur Street\", \"line2\": \"New Orleans , Louisiana 70130\", \"zipCode\": \"70130\", \"cityCode\": null, \"cityName\": \"New Orleans\", \"stateCode\": \"\", \"stateName\": null, \"countryCode\": \"US\", \"countryName\": \"United States\"}, \"checkIn\": null, \"contact\": {\"faxNo\": \"+15045256079\", \"phoneNo\": \"+15045292345\", \"website\": \"http://www.bienvillehouse.com/\"}, \"geocode\": {\"lat\": 2

In [38]:
def get_specifiq_data_from_system_id(table, systemid, engine):
    # SQL query to fetch data for a specific SystemId
    query = f"SELECT * FROM {table} WHERE SystemId = '{systemid}';"
    df = pd.read_sql(query, engine)

    if df.empty:
        print("No data found for the provided SystemId.")
        return None

    # Assuming only one row will be returned for a specific SystemId
    hotel_data = df.iloc[0].to_dict()

    # Extract nested JSON from the 'HotelInfo' field
    hotel_info = json.loads(hotel_data.get("HotelInfo", "{}"))
    
    createdAt = hotel_data.get("CreatedAt")
    
    if isinstance(createdAt, pd.Timestamp):
        # Convert to string if needed and format timestamp
        createdAt_str = createdAt.strftime("%Y-%m-%dT%H:%M:%S")
        created_at_dt = datetime.strptime(createdAt_str, "%Y-%m-%dT%H:%M:%S")
        timeStamp = int(created_at_dt.timestamp())
    else:
        created_at_dt = datetime.strptime(createdAt, "%Y-%m-%dT%H:%M:%S")
        timeStamp = int(created_at_dt.timestamp())

    # print("CreatedAt:", created_at_dt)
    # print("Timestamp:", timeStamp)
    # print("HotelInfo:", hotel_info)
    
    # Construct the hotel photo data in the desired format
    hotel_photo_data = [
        {
            "picture_id": "NULL",  
            "title": "NULL",       
            "url": url             
        } for url in hotel_info.get("imageUrls", []) or []
    ]

    hotel_room_amenities = [
        {
            "type": ameList,
            "title": ameList,
            "icon": "NULL"
        } for ameList in hotel_info.get("masterRoomAmenities", []) or []
    ]
    
    hotel_amenities = [
        {
            "type": ameList,
            "title": ameList,
            "icon": "NULL"
        } for ameList in hotel_info.get("masterHotelAmenities", []) or []
    ]
    
    specific_data = {
        "created": createdAt_str,
        "timestamp": timeStamp,
        "hotel_id": hotel_data.get("SystemId", "NULL"),
        "name": hotel_info.get("name", hotel_data.get("HotelName", "NULL")),
        "name_local": hotel_info.get("name", hotel_data.get("HotelName", "NULL")),
        "hotel_formerly_name": "NULL",
        "destination_code": hotel_data.get("GiDestinationId", "NULL"),
        "country_code":  hotel_data.get("CountryCode", "NULL"),
        "brand_text": "NULL",
        "property_type": "NULL",
        "star_rating": hotel_info.get("rating", hotel_data.get("Rating", "NULL")),
        "chain": "NULL",
        "brand": "NULL",
        "logo": "NULL",
        "primary_photo": hotel_info.get("imageUrl", hotel_data.get("ImageUrl", "NULL")),
        "review_rating": {
            "source": "NULL",
            "number_of_reviews": "NULL",
            "rating_average": hotel_info.get("tripAdvisorRating", "NULL"),
            "popularity_score": "NULL",
        },
        "policies": {
            "checkin": {
                "begin_time": "NULL",
                "end_time": "NULL",
                "instructions": "NULL",
                "special_instructions": "NULL",
                "min_age": "NULL",
            },
            "checkout": {
                "time": "NULL",
            },
            "fees": {
                "optional": "NULL",
            },
            "know_before_you_go": "NULL",
            "pets": "NULL",
            "remark": "NULL",
            "child_and_extra_bed_policy": {
                "infant_age": "NULL",
                "children_age_from": "NULL",
                "children_age_to": "NULL",
                "children_stay_free": "NULL",
                "min_guest_age": "NULL"
            },
            "nationality_restrictions": "NULL",
        },
        "address": {
            "latitude": hotel_info.get("geocode", {}).get("lat", hotel_data.get("Latitude", "NULL")),
            "longitude": hotel_info.get("geocode", {}).get("lon", hotel_data.get("Longitude", "NULL")),
            "address_line_1": hotel_data.get("Address1", "NULL"),
            "address_line_2": hotel_data.get("Address2", "NULL"),
            "city": hotel_data.get("City", "NULL"),
            "state": hotel_info.get("address", {}).get("stateName", "NULL"),
            "country": hotel_data.get("CountryName", "NULL"),
            "country_code": hotel_data.get("CountryCode", "NULL"),
            "postal_code": hotel_data.get("ZipCode", "NULL"),
            "full_address": f"{hotel_data.get('Address1', 'NULL')}, {hotel_data.get('Address2', 'NULL')}",
            "google_map_site_link": "NULL",
            "local_lang": {
                "latitude": hotel_info.get("geocode", {}).get("lat", hotel_data.get("Latitude", "NULL")),
                "longitude": hotel_info.get("geocode", {}).get("lon", hotel_data.get("Longitude", "NULL")),
                "address_line_1": hotel_data.get("Address1", "NULL"),
                "address_line_2": hotel_data.get("Address2", "NULL"),
                "city": hotel_data.get("City", "NULL"),
                "state": hotel_info.get("address", {}).get("stateName", "NULL"),
                "country": hotel_data.get("CountryName", "NULL"),
                "country_code": hotel_data.get("CountryCode", "NULL"),
                "postal_code": hotel_data.get("ZipCode", "NULL"),
                "full_address": f"{hotel_data.get('Address1', 'NULL')}, {hotel_data.get('Address2', 'NULL')}", 
                "google_map_site_link": "NULL",
            },
            "mapping": {
                "continent_id": "NULL",
                "country_id": hotel_data.get("CountryCode", "NULL"),
                "province_id": "NULL",
                "state_id": "NULL",
                "city_id": "NULL",
                "area_id": "NULL"
            }
        },
        "contacts": {
            "phone_numbers": [hotel_info.get("contact", {}).get("phoneNo", "NULL")],
            "fax": hotel_info.get("contact", {}).get("faxNo", "NULL"),
            "email_address": "NULL",
            "website": hotel_info.get("contact", {}).get("website", hotel_data.get("Website", "NULL"))
        },
        "descriptions": [
            {
                "title": "NULL",
                "text": "NULL"
            }
        ],
        "room_type": {
            "room_id": "NULL",
            "title": "NULL",
            "title_lang": "NULL",
            "room_pic": "NULL",
            "description": "NULL",
            "max_allowed": {
            "total": "NULL",
            "adults": "NULL",
            "children": "NULL",
            "infant": "n/a"
            },
            "no_of_room": "n/a",
            "room_size": "NULL",
            "bed_type": [
                    {
                    "description": "NULL",
                    "configuration": [
                        {
                        "quantity": "NULL",
                        "size": "NULL",
                        "type": "NULL"
                        }
                    ],
                    "max_extrabeds": "n/a"
                    }
                ],
            "shared_bathroom": "n/a"
            },
        "spoken_languages": {
            "type": "NULL",
            "title": "NULL",
            "icon": "NULL"
            },
        "amenities": hotel_room_amenities,
        "facilities": hotel_amenities,
        "hotel_photo": hotel_photo_data, 
        
        "point_of_interests": [
            {
            "code": "NULL",
            "name": "NULL"
            }
        ],
        "nearest_airports": [
            {
            "code": "NULL",
            "name": "NULL"
            }
        ],
        "train_stations": [
            {
            "code": "NULL",
            "name": "NULL"
            }
        ], 
        "connected_locations": [
            {
            "code": "NULL",
            "name": "NULL"
            },
        ],
        "stadiums": [
            {
            "code": "NULL",
            "name": "NULL"
            }
        ]
    }


    return specific_data


    # with open(output_file, "w") as f:
    #     json.dump(specific_data, f, indent=4)

    # print(f"Data for SystemId {systemid} saved to {output_file}")

# Fetch and save data for a specific SystemId
# table = 'hotel_info_all'
# systemId = '1081355'
# data = get_specifiq_data_from_system_id(table=table, systemid=systemId, engine=engine)
# print(data)


In [None]:
def save_json_files_follow_systemId(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    table = 'hotel_info_all'
    column = 'SystemId'

    systemid_list = get_system_id_list(table, column, engine)

    for systemid in systemid_list:
        file_name = f"{systemid}.json"
        file_path = os.path.join(folder_path, file_name)

        data_dict = get_specifiq_data_from_system_id(table, systemid, engine)

        with open(file_path, "w") as json_file:
            json.dump(data_dict, json_file, indent=4)
            
        print(f"Save {file_name} in {folder_path}")



folder_path = './gill_hotel_json_files/AE'

save_json_files_follow_systemId(folder_path)

Save 745474.json in ./gill_hotel_json_files/AE
Save 1269765.json in ./gill_hotel_json_files/AE
Save 1138704.json in ./gill_hotel_json_files/AE
Save 1138705.json in ./gill_hotel_json_files/AE
Save 1138706.json in ./gill_hotel_json_files/AE
Save 1376276.json in ./gill_hotel_json_files/AE
Save 1384469.json in ./gill_hotel_json_files/AE
Save 417814.json in ./gill_hotel_json_files/AE
Save 753687.json in ./gill_hotel_json_files/AE
Save 122905.json in ./gill_hotel_json_files/AE
Save 1081370.json in ./gill_hotel_json_files/AE
Save 1384479.json in ./gill_hotel_json_files/AE
Save 139299.json in ./gill_hotel_json_files/AE
Save 139304.json in ./gill_hotel_json_files/AE
Save 516142.json in ./gill_hotel_json_files/AE
Save 1294384.json in ./gill_hotel_json_files/AE
Save 1187902.json in ./gill_hotel_json_files/AE
Save 434245.json in ./gill_hotel_json_files/AE
Save 131142.json in ./gill_hotel_json_files/AE
Save 16457.json in ./gill_hotel_json_files/AE
Save 139337.json in ./gill_hotel_json_files/AE
Save