## Database all function are checked here

#### Add module for database connections

In [None]:
from sqlalchemy import create_engine, text, inspect
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
import time
import os


### Connect enviroment using dotenv.

In [None]:
load_dotenv()

db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_name = os.getenv('DB_NAME')

### Create engine

In [None]:
DATABASE_URL = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
engine = create_engine(DATABASE_URL)

### Database connection test

In [None]:
try:
    with engine.connect() as connection:
        result = connection.execute(text("SELECT DATABASE()"))
        db_name = result.fetchone()[0]
        print(f"Connected to database: {db_name}")
except Exception as e:
    print(f"Error: {e}")

### Print all database table

In [None]:
try:
    inspactor = inspect(engine)
    tables = inspactor.get_table_names()
    if tables:
        print(f"Table in the database: {db_name}")
        for table in tables:
            print(table)
except Exception as e:
    print(f"Error from database. {e}")
    

### Get all data from a table for test.

In [None]:
table_name = "innova_hotels_main"

df = pd.read_sql(table_name, engine)

# print(df)

### Get header in the data table using pandes only

In [None]:
table_name = "innova_hotels_main"

df = pd.read_sql(table_name, engine)

header = df.columns.tolist()

print(header)

### Get header in the data table using SQL query

In [None]:
table = "innova_hotels_main"
df = pd.read_sql(table, engine)

query = f"SELECT * FROM {table} LIMIT 0;"

header = pd.read_sql(query, engine)

header_with_dataframe = header.columns.to_list()
# print(header)
print(header_with_dataframe)

### Get number one row in my data table without dictionary

In [None]:
table = "innova_hotels_main"
df = pd.read_sql(table, engine)

number_one_row = df.iloc[0]
print(f"Number one row:\n {number_one_row}")

### Get number one row in my data table with dictionary

In [None]:
table = "innova_hotels_main"
df = pd.read_sql(table, engine)

one_number_row = df.iloc[0]

row_info_with_dict = one_number_row.to_dict()

print(row_info_with_dict)

### Get selectted key value.

In [None]:
hotel_id = row_info_with_dict.get("HotelId")
supplierCode = row_info_with_dict.get("SupplierCode")
hotel_name = row_info_with_dict.get("HotelName")
amenities = row_info_with_dict.get("Amenities_1")

print(f"Holte Id: {hotel_id}")
print(f"Supplier Code: {supplierCode}")
print(f"Hotel Name: {hotel_name}")

# print(f"Hotel Amenities: {amenities}")

### Split data in a sell and separate value using strip.

In [None]:
amenities_1 = row_info_with_dict.get("Amenities_1")

split_data = amenities_1.split(",")

print("Lenth data in sell:", len(split_data))

print("\n")

for i in range(len(split_data)):
    print(split_data[i].strip())
# print(split_data)

# For ratehawk with localdb.

In [None]:
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, Column, Float, Boolean, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import text
import json

In [None]:
load_dotenv()

db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_name = os.getenv('DB_NAME')

In [None]:
DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

In [None]:
DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

try:
    with local_engine.connect() as connection:
        result = connection.execute(text("SELECT DATABASE()"))
        db_name = result.fetchone()[0]
        print(f"Connected to database: {db_name}")
except Exception as e:
    print(f"Error: {e}")

In [None]:
try:
    inspactor = inspect(local_engine)
    tables = inspactor.get_table_names()
    if tables:
        print(f"Table in the database: {db_name}")
        for table in tables:
            print(table)
except Exception as e:
    print(f"Error from database. {e}")

In [None]:
table = "ratehawk"

query = f"SELECT * FROM {table} LIMIT 4;"

df = pd.read_sql(query, local_engine)

one_number_row = df.iloc[0]

row_info_with_dict = one_number_row.to_dict()

# print(row_info_with_dict)

In [22]:
row_dict = df.astype(str).to_dict(orient="records")[0] 
print(row_dict)

{'my_id': '3199212', 'address': 'Kampong Chheuteal Village, Prasat Sambor, Prasat Sambor', 'amenity_groups': "[{'amenities': ['24-hour reception', 'Smoke-free property', 'Garden'], 'non_free_amenities': [], 'group_name': 'General'}, {'amenities': ['Non-smoking rooms'], 'non_free_amenities': [], 'group_name': 'Rooms'}, {'amenities': ['Laundry'], 'non_free_amenities': ['Laundry'], 'group_name': 'Services and amenities'}, {'amenities': ['English'], 'non_free_amenities': [], 'group_name': 'Languages Spoken'}, {'amenities': ['No parking'], 'non_free_amenities': [], 'group_name': 'Parking'}, {'amenities': ['Beach/pool towels'], 'non_free_amenities': [], 'group_name': 'Pool and beach'}, {'amenities': ['Pets Not Allowed'], 'non_free_amenities': [], 'group_name': 'Pets'}]", 'check_in_time': '14:00:00', 'check_out_time': '12:00:00', 'description_struct': "[{'paragraphs': ['Want to save money while travelling? Itâ\\x80\\x99s easy: hotel Â«Isanborei Homestay 2Â» is located in Prasat Sambor. This h

In [35]:
import ast

address = row_dict.get("address")
amenities = row_dict.get("amenity_groups")

# print(amenities)

amenities_list = ast.literal_eval(amenities)

general_amenities = next(item for item in amenities_list if item.get('group_name') == 'General')

# Print the result
print(general_amenities.get("amenities")[0])


amenities_json_format = json.dumps(amenities, indent=4)

# print(amenities_json_format)
# print("Address: ", address)

24-hour reception


In [None]:
json_value = json.dumps(row_dict, indent=4)

print(json_value)

In [37]:
from sqlalchemy import create_engine, Table, MetaData, text
from sqlalchemy.orm import sessionmaker
import json
import os

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value


# Mapping function to transfer only the first 10 rows
def transfer_first_10_rows():
    try:
        with session_1.begin(), session_2.begin():
            # Fetch the first 10 rows from the ratehawk table
            results = session_1.query(ratehawk).limit(10).all()

            print(result)

        #     for row in results:
        #         # Parse JSON fields from the ratehawk table
        #         data = {
        #             "name": row.name,
        #             "systemId": row.hid,
        #             "address": row.address,
        #             "geocode.lat": row.latitude,
        #             "geocode.lon": row.longitude,
        #             "rating": row.star_rating,
        #             "contact.phoneNo": row.phone,
        #             "imageUrls": json.loads(row.images) if row.images else [],
        #         }

        #         # Map the primary image URL if available
        #         if data["imageUrls"]:
        #             data["PrimaryPhoto"] = data["imageUrls"][0]
        #         else:
        #             data["PrimaryPhoto"] = None

        #         # Map amenities to specific columns
        #         amenities = json.loads(row.amenity_groups) if row.amenity_groups else []
        #         for i in range(1, 6):
        #             column_name = f"Amenities_{i}"
        #             if amenities and i <= len(amenities):
        #                 data[column_name] = amenities[i - 1]['amenities'][0]
        #             else:
        #                 data[column_name] = None

        #         data['SupplierCode'] = 'ratehawk'
        #         data = {k: escape_single_quotes(v) for k, v in data.items()}

        #         # Set missing columns to NULL
        #         for col in innova_hotels_main.columns:
        #             if col.name not in data:
        #                 data[col.name] = None

        #         # Prepare SQL query for upsert
        #         columns = ', '.join(data.keys())
        #         values = ', '.join([f"'{v}'" if v is not None else 'NULL' for v in data.values()])
        #         update_clause = ', '.join([f"{col} = VALUES({col})" for col in data.keys()])

        #         sql = f"""
        #             INSERT INTO innova_hotels_main ({columns})
        #             VALUES ({values})
        #             ON DUPLICATE KEY UPDATE {update_clause}
        #         """
        #         session_2.execute(text(sql))

        # print("First 10 rows updated successfully in innova_hotels_main.")
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()

# Execute the transfer
transfer_first_10_rows()


An error occurred: (pymysql.err.ProgrammingError) (1146, "Table 'itt_master_contents.ratehawk' doesn't exist")
[SQL: SELECT ratehawk.my_id AS ratehawk_my_id, ratehawk.address AS ratehawk_address, ratehawk.amenity_groups AS ratehawk_amenity_groups, ratehawk.check_in_time AS ratehawk_check_in_time, ratehawk.check_out_time AS ratehawk_check_out_time, ratehawk.description_struct AS ratehawk_description_struct, ratehawk.id AS ratehawk_id, ratehawk.hid AS ratehawk_hid, ratehawk.images AS ratehawk_images, ratehawk.images_ext AS ratehawk_images_ext, ratehawk.kind AS ratehawk_kind, ratehawk.latitude AS ratehawk_latitude, ratehawk.longitude AS ratehawk_longitude, ratehawk.name AS ratehawk_name, ratehawk.phone AS ratehawk_phone, ratehawk.policy_struct AS ratehawk_policy_struct, ratehawk.postal_code AS ratehawk_postal_code, ratehawk.room_groups AS ratehawk_room_groups, ratehawk.region AS ratehawk_region, ratehawk.star_rating AS ratehawk_star_rating, ratehawk.email AS ratehawk_email, ratehawk.serp_

In [None]:
from sqlalchemy import create_engine, Table, MetaData, text
from sqlalchemy.orm import sessionmaker
import json
import os

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Columns to be included in the insert
columns_to_insert = [
    'Id', 'IttId', 'VervotechId', 'SupplierCode', 'HotelId', 'DestinationId', 'City', 'CityCode', 'State', 
    'StateCode', 'PostCode', 'Country', 'CountryCode', 'HotelType', 'HotelName', 'Latitude', 'Longitude', 
    'PrimaryPhoto', 'AddressLine1', 'AddressLine2', 'HotelReview', 'Website', 'Email', 'ContactNumber', 
    'HotelStar', 'RoomAmenities', 'Amenities_1', 'Amenities_2', 'Amenities_3', 'Amenities_4'
]

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value

# Mapping function to transfer only the first 10 rows
def transfer_first_10_rows():
    try:
        with session_1.begin(), session_2.begin():
            # Fetch the first 10 rows from the ratehawk table in the local database
            results = session_2.query(ratehawk).limit(10).all()

            for row in results:
                try:
                    # Parse JSON fields from the ratehawk table
                    images = json.loads(row.images) if row.images else []
                except json.JSONDecodeError:
                    print(f"Invalid JSON in 'images' for row ID: {row.my_id}. Skipping...")
                    images = []

                try:
                    amenities = json.loads(row.amenity_groups) if row.amenity_groups else []
                except json.JSONDecodeError:
                    print(f"Invalid JSON in 'amenity_groups' for row ID: {row.my_id}. Skipping...")
                    amenities = []

                data = {
                    
                }

                data = {k: escape_single_quotes(v) if v is not None else None for k, v in data.items()}

                # Prepare SQL query for upsert
                columns = ', '.join(data.keys())
                values = ', '.join([f"'{v}'" if v is not None else 'NULL' for v in data.values()])
                update_clause = ', '.join([f"{col} = VALUES({col})" for col in data.keys()])

                sql = f"""
                    INSERT INTO innova_hotels_main ({columns})
                    VALUES ({values})
                    ON DUPLICATE KEY UPDATE {update_clause}
                """
                session_1.execute(text(sql))

        print("First 10 rows updated successfully in innova_hotels_main.")
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()


# Execute the transfer
transfer_first_10_rows()


In [65]:
from sqlalchemy import create_engine, Table, MetaData, text
from sqlalchemy.orm import sessionmaker, load_only
import json
import os

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value

# Mapping function to transfer only the first 10 rows
def transfer_first_10_rows():
    try:
        with session_1.begin(), session_2.begin():
            query = session_2.query(ratehawk).limit(1).statement
            df = pd.read_sql(query, local_engine)
            row_dict = df.astype(str).to_dict(orient="records")[0]
            # print(row_dict)


            keys_to_extract =  ["address", "amenity_groups", "hid", "images", "kind", "latitude", "longitude", "name", "phone", "postal_code", "region", "star_rating",
                               "email", ]
            filtered_row_dict = {key: row_dict.get(key, None) for key in keys_to_extract}
    
            print(filtered_row_dict)


        
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()


# Execute the transfer
transfer_first_10_rows()


{'address': 'Kampong Chheuteal Village, Prasat Sambor, Prasat Sambor', 'amenity_groups': "[{'amenities': ['24-hour reception', 'Smoke-free property', 'Garden'], 'non_free_amenities': [], 'group_name': 'General'}, {'amenities': ['Non-smoking rooms'], 'non_free_amenities': [], 'group_name': 'Rooms'}, {'amenities': ['Laundry'], 'non_free_amenities': ['Laundry'], 'group_name': 'Services and amenities'}, {'amenities': ['English'], 'non_free_amenities': [], 'group_name': 'Languages Spoken'}, {'amenities': ['No parking'], 'non_free_amenities': [], 'group_name': 'Parking'}, {'amenities': ['Beach/pool towels'], 'non_free_amenities': [], 'group_name': 'Pool and beach'}, {'amenities': ['Pets Not Allowed'], 'non_free_amenities': [], 'group_name': 'Pets'}]", 'hid': '6291584', 'images': "['https://cdn.worldota.net/t/{size}/content/b9/e4/b9e4799bb77f46b628d9a55a9c67d22205e32c6d.jpeg', 'https://cdn.worldota.net/t/{size}/content/5f/d9/5fd9a14ac6cd2602a5185d638be94c0395e111b9.jpeg', 'https://cdn.worldot

In [86]:
from sqlalchemy import create_engine, Table, MetaData, text
from sqlalchemy.orm import sessionmaker, load_only
import json
import os

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value

# Mapping function to transfer only the first 10 rows
def get_data_dict():
    try:
        with session_1.begin(), session_2.begin():
            query = session_2.query(ratehawk).limit(10).statement
            df = pd.read_sql(query, local_engine)
            row_dict = df.astype(str).to_dict(orient="records")[0]
            # print(row_dict)


            keys_to_extract =  ["address", "amenity_groups", "hid", "images", "kind", "latitude", "longitude", "name", "phone", "postal_code", "region", "star_rating",
                               "email", ]
            filtered_row_dict = {key: row_dict.get(key, None) for key in keys_to_extract}
    
            print(filtered_row_dict)



        
        print("First 10 rows updated successfully in innova_hotels_main.")
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()


# Execute the transfer
get_data_dict()


{'address': 'Kampong Chheuteal Village, Prasat Sambor, Prasat Sambor', 'amenity_groups': "[{'amenities': ['24-hour reception', 'Smoke-free property', 'Garden'], 'non_free_amenities': [], 'group_name': 'General'}, {'amenities': ['Non-smoking rooms'], 'non_free_amenities': [], 'group_name': 'Rooms'}, {'amenities': ['Laundry'], 'non_free_amenities': ['Laundry'], 'group_name': 'Services and amenities'}, {'amenities': ['English'], 'non_free_amenities': [], 'group_name': 'Languages Spoken'}, {'amenities': ['No parking'], 'non_free_amenities': [], 'group_name': 'Parking'}, {'amenities': ['Beach/pool towels'], 'non_free_amenities': [], 'group_name': 'Pool and beach'}, {'amenities': ['Pets Not Allowed'], 'non_free_amenities': [], 'group_name': 'Pets'}]", 'hid': '6291584', 'images': "['https://cdn.worldota.net/t/{size}/content/b9/e4/b9e4799bb77f46b628d9a55a9c67d22205e32c6d.jpeg', 'https://cdn.worldota.net/t/{size}/content/5f/d9/5fd9a14ac6cd2602a5185d638be94c0395e111b9.jpeg', 'https://cdn.worldot

### Input RateHawk hotel data in innova_hotels_main table here only insert data 10 rows.

In [84]:
from sqlalchemy import create_engine, Table, MetaData, text, insert
from sqlalchemy.orm import sessionmaker, load_only
import pandas as pd
import json
import os
import ast

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value



def transfer_first_10_rows():
    try:
        with session_1.begin(), session_2.begin():
            query = session_2.query(ratehawk).limit(10).statement
            df = pd.read_sql(query, local_engine)
            rows = df.astype(str).to_dict(orient="records")

            for row_dict in rows:
                keys_to_extract = [
                    "address", "hid", "images", "kind", "latitude", "longitude", "name", 
                    "phone", "postal_code", "region", "star_rating", "email", "amenity_groups"
                ]
                filtered_row_dict = {key: row_dict.get(key, None) for key in keys_to_extract}

                try:
                    region = ast.literal_eval(filtered_row_dict.get("region", "{}"))
                except (ValueError, SyntaxError):
                    region = {}

                try:
                    amenity_groups = ast.literal_eval(filtered_row_dict.get("amenity_groups", "[]"))
                except (ValueError, SyntaxError):
                    amenity_groups = []

                try:
                    images = ast.literal_eval(filtered_row_dict.get("images", "[]"))
                except (ValueError, SyntaxError):
                    images = []

                if images:
                    images = [image_url.replace("t/{size}", "t/x500") for image_url in images]


                data = {
                    'HotelId': filtered_row_dict.get("hid", None),
                    'City': region.get("name", None),
                    'Country': region.get("country_name", None),
                    'CountryCode': region.get("country_code", None),
                    'PostCode': filtered_row_dict.get("postal_code", None),
                    'HotelType': filtered_row_dict.get("kind"),
                    'HotelName': filtered_row_dict.get("name"),
                    'Latitude': filtered_row_dict.get("latitude"),
                    'Longitude': filtered_row_dict.get("longitude"),
                    'PrimaryPhoto': images[0] if images else None,
                    'AddressLine1': filtered_row_dict.get("address"),
                    'Email': filtered_row_dict.get("email"),
                    'ContactNumber': filtered_row_dict.get("phone"),
                    'HotelStar': filtered_row_dict.get("star_rating"),
                    'Amenities_1': None,
                    'Amenities_2': None,
                    'Amenities_3': None,
                    'Amenities_4': None,
                    'Amenities_5': None,
                    'SupplierCode': 'ratehawk'
                }

                if amenity_groups:
                    first_group = amenity_groups[0].get("amenities", [])
                    data['Amenities_1'] = first_group[0] if len(first_group) > 0 else None
                    data['Amenities_2'] = first_group[1] if len(first_group) > 1 else None
                    data['Amenities_3'] = first_group[2] if len(first_group) > 2 else None
                    data['Amenities_4'] = first_group[3] if len(first_group) > 3 else None
                    data['Amenities_5'] = first_group[4] if len(first_group) > 4 else None
                else:
                    data['Amenities_1'] = None
                    data['Amenities_2'] = None
                    data['Amenities_3'] = None
                    data['Amenities_4'] = None
                    data['Amenities_5'] = None

                stmt = insert(innova_hotels_main).values(data)
                session_1.execute(stmt)

        print("First 10 rows updated successfully in innova_hotels_main.")
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()

# Execute the transfer
transfer_first_10_rows()


First 10 rows updated successfully in innova_hotels_main.


### Input ratehawk hotel all data using batch size

In [None]:
from sqlalchemy import create_engine, Table, MetaData, insert
from sqlalchemy.orm import sessionmaker
import pandas as pd
import json
import os
import ast

# Database connection setup
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL_SERVER = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
server_engine = create_engine(DATABASE_URL_SERVER)
Session_1 = sessionmaker(bind=server_engine)
session_1 = Session_1()

DATABASE_URL_LOCAL = "mysql+pymysql://root:@localhost/csvdata01_02102024"
local_engine = create_engine(DATABASE_URL_LOCAL)
Session_2 = sessionmaker(bind=local_engine)
session_2 = Session_2()

# Create separate MetaData objects for each engine
metadata_local = MetaData()
metadata_server = MetaData()

# Reflect tables to the correct engines
metadata_local.reflect(bind=local_engine)
metadata_server.reflect(bind=server_engine)

ratehawk = Table('ratehawk', metadata_local, autoload_with=local_engine)
innova_hotels_main = Table('innova_hotels_main', metadata_server, autoload_with=server_engine)

# Helper function to escape single quotes
def escape_single_quotes(value):
    if isinstance(value, str):
        return value.replace("'", "''")
    return value

def transfer_all_rows():
    try:
        with session_1.begin(), session_2.begin():
            # Calculate total rows to process
            total_rows_query = session_2.query(ratehawk).count()
            batch_size = 10000
            total_batches = (total_rows_query // batch_size) + (1 if total_rows_query % batch_size > 0 else 0)

            for batch_number in range(total_batches):
                offset = batch_number * batch_size
                query = session_2.query(ratehawk).offset(offset).limit(batch_size).statement
                df = pd.read_sql(query, local_engine)
                rows = df.astype(str).to_dict(orient="records")

                for row_dict in rows:
                    keys_to_extract = [
                        "address", "hid", "images", "kind", "latitude", "longitude", "name", 
                        "phone", "postal_code", "region", "star_rating", "email", "amenity_groups"
                    ]
                    filtered_row_dict = {key: row_dict.get(key, None) for key in keys_to_extract}

                    try:
                        region = ast.literal_eval(filtered_row_dict.get("region", "{}"))
                    except (ValueError, SyntaxError):
                        region = {}

                    try:
                        amenity_groups = ast.literal_eval(filtered_row_dict.get("amenity_groups", "[]"))
                    except (ValueError, SyntaxError):
                        amenity_groups = []

                    try:
                        images = ast.literal_eval(filtered_row_dict.get("images", "[]"))
                    except (ValueError, SyntaxError):
                        images = []

                    if images:
                        images = [image_url.replace("t/{size}", "t/x500") for image_url in images]

                    data = {
                        'HotelId': filtered_row_dict.get("hid", None),
                        'City': region.get("name", None),
                        'Country': region.get("country_name", None),
                        'CountryCode': region.get("country_code", None),
                        'PostCode': filtered_row_dict.get("postal_code", None),
                        'HotelType': filtered_row_dict.get("kind"),
                        'HotelName': filtered_row_dict.get("name"),
                        'Latitude': filtered_row_dict.get("latitude"),
                        'Longitude': filtered_row_dict.get("longitude"),
                        'PrimaryPhoto': images[0] if images else None,
                        'AddressLine1': filtered_row_dict.get("address"),
                        'Email': filtered_row_dict.get("email"),
                        'ContactNumber': filtered_row_dict.get("phone"),
                        'HotelStar': filtered_row_dict.get("star_rating"),
                        'Amenities_1': None,
                        'Amenities_2': None,
                        'Amenities_3': None,
                        'Amenities_4': None,
                        'Amenities_5': None,
                        'SupplierCode': 'ratehawk'
                    }

                    if amenity_groups:
                        first_group = amenity_groups[0].get("amenities", [])
                        data['Amenities_1'] = first_group[0] if len(first_group) > 0 else None
                        data['Amenities_2'] = first_group[1] if len(first_group) > 1 else None
                        data['Amenities_3'] = first_group[2] if len(first_group) > 2 else None
                        data['Amenities_4'] = first_group[3] if len(first_group) > 3 else None
                        data['Amenities_5'] = first_group[4] if len(first_group) > 4 else None
                    else:
                        data['Amenities_1'] = None
                        data['Amenities_2'] = None
                        data['Amenities_3'] = None
                        data['Amenities_4'] = None
                        data['Amenities_5'] = None

                    stmt = insert(innova_hotels_main).values(data)
                    session_1.execute(stmt)

                print(f"Batch {batch_number + 1} of {total_batches} processed successfully.")

        print("All rows updated successfully in innova_hotels_main.")
    except Exception as e:
        print(f"An error occurred: {e}")
        session_1.rollback()
        session_2.rollback()

# Execute the transfer
transfer_all_rows()
