#### Tests version

In [1]:
import sys

print(sys.version)

3.12.0 (tags/v3.12.0:0fb18b0, Oct  2 2023, 13:03:39) [MSC v.1935 64 bit (AMD64)]


## Import liberay and set connection with database with database cradensial.

In [2]:
import os
from sqlalchemy import create_engine, text, inspect
from dotenv import load_dotenv
import pandas as pd

load_dotenv()

# Database connection details
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_pass = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

DATABASE_URL = f"mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}"
engine = create_engine(DATABASE_URL)

### Database Connecting test query

In [4]:
try:
    with engine.connect() as connection:
        result = connection.execute(text("SELECT DATABASE()"))
        db_name = result.fetchone()[0]
        print(f"Connected to database: {db_name}")
except Exception as e:
    print(f"Error: {e}")

Connected to database: itt_master_contents


### Print All table list in the database for check connection.

In [5]:
try:
    inspector = inspect(engine)
    tables = inspector.get_table_names()
    if tables:
        print(f"Tables in the database '{db_name}':")
        for table in tables:
            print(table)
    else:
        print(f"No tables found in the database '{db_name}'")
except Exception as e:
    print(f"Error: {e}")

Tables in the database 'itt_master_contents':
hotel_info_all
hotels_info_with_gidestination_code
innova_hotels_main
vervotech_ProviderFamily
vervotech_hotel_list
vervotech_hotel_map_new
vervotech_hotel_map_update
vervotech_mapping
vervotech_update_data_info


### Get data some rows

In [14]:

def get_some_rows(table, engine, limit):
    query = f"SELECT * FROM {table} LIMIT {limit}"
    try:
        # Use a context manager for automatic transaction handling
        with engine.connect() as connection:
            df = pd.read_sql(query, connection)
            df.to_csv("HotelData.csv", index=False)
            print("Data saved to HotelData.csv successfully.")
        return df
    except SQLAlchemyError as e:
        # Explicitly roll back if any SQL error occurs
        connection.rollback()
        print(f"An error occurred: {e}")
        raise  # Re-raise the error after rollback for debugging

# Usage example
table = "vervotech_hotel_list"
# data = get_some_rows(table, engine, 100000)

Data saved to HotelData.csv successfully.


### Active suplayer check.

In [9]:
def get_provider_family_active_list(table, engine):
    query = f"SELECT ProviderFamily FROM {table} WHERE status = 'active'"
    df = pd.read_sql(query, engine)

    provider_family_active_list = provider_families = df['ProviderFamily'].tolist()
    return provider_family_active_list

get_provider_family_name('vervotech_ProviderFamily', engine)

['HotelBeds', 'TBO', 'EAN', 'MGHoliday', 'Agoda']

## Function: Get all data information from the table. using pandas read_sql funtion also.

In [7]:
def total_data_count(table, engine):
    query = f"SELECT * FROM {table}"
    df = pd.read_sql(query, engine)
    # print(df)

total_data_count(table="vervotech_hotel_map_new", engine=engine)

          Id             last_update VervotechId UpdateDateFormat  \
0          1  2024/10/15 17:04:09 PM    70487877             None   
1          2  2024/10/15 17:04:09 PM    70477374             None   
2          3  2024/10/15 17:04:09 PM    70488525             None   
3          4  2024/10/15 17:04:09 PM    70488538             None   
4          5  2024/10/15 17:04:09 PM    70480802             None   
...      ...                     ...         ...              ...   
10269  10270  2024/11/04 01:00:09 AM    70491879             None   
10270  10271  2024/11/04 01:00:09 AM    70490902             None   
10271  10272  2024/11/04 01:00:09 AM    70492469             None   
10272  10273  2024/11/04 01:00:09 AM    70492842             None   
10273  10274  2024/11/04 01:00:09 AM    70490907             None   

      ProviderHotelId ProviderFamily ChannelIds ProviderLocationCode  \
0           100321969            EAN       None                 None   
1           100335387      

### Function: Total data count.

In [43]:
def total_data_count(table, engine):
    query = f"SELECT COUNT(*) FROM {table}"
    df = pd.read_sql(query, engine)
    total_data = df.iloc[0, 0]
    return total_data


total_data_count(table="vervotech_hotel_map_new", engine=engine)

np.int64(8674)

## Function: Get Group data in database.

In [34]:
def new_group_data(table, engine):
    query = f"""
    SELECT ProviderFamily, COUNT(*) AS value_count
    FROM {table}
    WHERE DATE(created_at) = (
        SELECT DATE(MAX(created_at)) 
        FROM {table}
    )
    GROUP BY ProviderFamily;
    """
    df = pd.read_sql(query, engine)
    return df 

data = new_group_data(table="vervotech_hotel_map_new", engine=engine)
print(data)


   ProviderFamily  value_count
0           Agoda            2
1            DOTW            2
2             EAN            6
3        GoGlobal            3
4      GRNConnect            3
5       HotelBeds           28
6       MGHoliday            1
7         Rakuten            3
8          Restel            1
9           Stuba           52
10            TBO            3


In [20]:
print(data.ProviderFamily[0])
print(data.value_count[0])
print(len(data))

Agoda
2
11


In [24]:
leanth_table = len(data)

for i in range (leanth_table):
    print(data.ProviderFamily[i])

Agoda
DOTW
EAN
GoGlobal
GRNConnect
HotelBeds
MGHoliday
Rakuten
Restel
Stuba
TBO


## Function: Get All Data for a Specific Key word. Update data show there.

In [35]:
def get_provider_family_data(table, engine, provider_family):
    query = f"""
    SELECT *
    FROM {table}
    WHERE ProviderFamily = %s
    AND DATE(created_at) = (
        SELECT DATE(MAX(created_at)) 
        FROM {table}
    );
    """
    # Use pandas to execute the query and pass the provider_family parameter
    df = pd.read_sql(query, engine, params=(provider_family,))
    return df

# Fetch all data for 'Agoda'
agoda_data = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family="Agoda")

# Print the result
print(agoda_data)


     Id             last_update VervotechId UpdateDateFormat ProviderHotelId  \
0  8571  2024/10/25 13:00:04 PM    70500662             None        13829532   
1  8572  2024/10/25 13:00:04 PM    39684057             None         2453037   

  ProviderFamily ChannelIds ProviderLocationCode                  status  \
0          Agoda       None                 None  Update data successful   
1          Agoda       None                 None  Update data successful   

           created_at          ModifiedOn  
0 2024-10-25 05:00:11 2024-10-25 07:00:12  
1 2024-10-25 05:00:11 2024-10-25 07:00:14  


### Get data choice specifiq column

In [28]:
import os
import json

df = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family="Agoda")

# Select only the desired columns
columns_to_include = ['Id', 'VervotechId', 'ProviderHotelId', 'ProviderFamily', 'status']
selected_df = df[columns_to_include]
print(selected_df)


     Id VervotechId ProviderHotelId ProviderFamily                  status
0  8571    70500662        13829532          Agoda  Update data successful
1  8572    39684057         2453037          Agoda  Update data successful


### Get all data specifiq column and key words

In [1]:
import os
import json

data = new_group_data(table="vervotech_hotel_map_new", engine=engine)

data_lenth = len(data)
for i in range(data_lenth):
    all_family_data = data.ProviderFamily[i]
    df = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family=all_family_data)

    # Select only the desired columns
    columns_to_include = ['Id', 'VervotechId', 'ProviderHotelId', 'ProviderFamily', 'status']
    selected_df = df[columns_to_include]
    # print(selected_df)

    grouped = selected_df.groupby('VervotechId')

    for vervotech_id, group in grouped:
        data_dict = group.to_dict(orient='records')
        print(data_dict)

NameError: name 'new_group_data' is not defined

### Convert into dictonary choice specifiq column.

In [49]:
grouped = selected_df.groupby('VervotechId')

for vervotech_id, group in grouped:
    # Convert the group DataFrame to a dictionary and serialize to JSON
    data_dict = group.to_dict(orient='records')
    print(data_dict)

[{'Id': 8318, 'VervotechId': '39368551', 'ProviderHotelId': '687153', 'ProviderFamily': 'Agoda', 'status': 'Update data successful'}]
[{'Id': 8317, 'VervotechId': '39683646', 'ProviderHotelId': '529347', 'ProviderFamily': 'Agoda', 'status': 'Update data successful'}]
[{'Id': 8316, 'VervotechId': '39783271', 'ProviderHotelId': '22309930', 'ProviderFamily': 'Agoda', 'status': 'Update data successful'}]
[{'Id': 8254, 'VervotechId': '39935372', 'ProviderHotelId': '1265152', 'ProviderFamily': 'Agoda', 'status': 'Skipping data'}, {'Id': 8315, 'VervotechId': '39935372', 'ProviderHotelId': '1265152', 'ProviderFamily': 'Agoda', 'status': 'Skipping data'}]


## Function: Save json format file in local measign.

In [51]:
import os
import json

def save_json_files_by_vervotechid(df, folder_path):
    # Ensure the folder exists, if not, create it
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    columns_to_include = ['Id', 'VervotechId', 'ProviderHotelId', 'ProviderFamily', 'status']
    selected_df = df[columns_to_include]
    
    grouped = selected_df.groupby('VervotechId')
    
    for vervotech_id, group in grouped:
        data_dict = group.to_dict(orient='records')

        file_name = f"{vervotech_id}.json"
        file_path = os.path.join(folder_path, file_name)
        
        with open(file_path, 'w') as json_file:
            json.dump(data_dict, json_file, indent=4)
        print(f"Saved {file_name} in {folder_path}")

agoda_data = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family="Agoda")

folder_path = './vervotech_json_files'

save_json_files_by_vervotechid(agoda_data, folder_path)


Saved 39368551.json in ./vervotech_json_files
Saved 39683646.json in ./vervotech_json_files
Saved 39783271.json in ./vervotech_json_files
Saved 39935372.json in ./vervotech_json_files


In [1]:
import os
import json

data = new_group_data(table="vervotech_hotel_map_new", engine=engine)

data_lenth = len(data)
for i in range(data_lenth):
    all_family_data = data.ProviderFamily[i]
    df = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family=all_family_data)

    # Select only the desired columns
    columns_to_include = ['Id', 'VervotechId', 'ProviderHotelId', 'ProviderFamily', 'status']
    selected_df = df[columns_to_include]
    # print(selected_df)

    grouped = selected_df.groupby('VervotechId')

    for vervotech_id, group in grouped:
        data_dict = group.to_dict(orient='records')
        

hello world


## Grouping and Saving Data as JSON by Folder

#### Create Folder if Not Exists:
##### Use os.makedirs() to ensure a folder is created if it doesn’t exist for each ProviderFamily.

#### Filter Desired Columns:
##### Use columns_to_include to select specific columns (VervotechId, ProviderHotelId, etc.) before grouping.

#### Group Data by VervotechId:
##### Group the DataFrame by VervotechId to create a JSON file for each unique ID.

#### Save Each Group as JSON:
##### Convert each group to a dictionary and save it as a JSON file in the folder named after ProviderFamily.

#### Loop Through Each ProviderFamily:
##### Loop over each unique ProviderFamily from data, create a folder, and save all corresponding JSON files there.

In [None]:
import os
import json

def save_json_files_by_vervotechid(df, folder_path):
    # Ensure the folder exists, if not, create it
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    columns_to_include = ['VervotechId', 'ProviderHotelId', 'ProviderFamily', 'status']
    selected_df = df[columns_to_include]
    
    grouped = selected_df.groupby('VervotechId')
    
    for vervotech_id, group in grouped:
        data_dict = group.to_dict(orient='records')

        file_name = f"{vervotech_id}.json"
        file_path = os.path.join(folder_path, file_name)
        
        with open(file_path, 'w') as json_file:
            json.dump(data_dict, json_file, indent=4)
        print(f"Saved {file_name} in {folder_path}")

# Main code to create a folder for each ProviderFamily and save JSON files
data = new_group_data(table="vervotech_hotel_map_new", engine=engine)

data_length = len(data)

for i in range(data_length):
    # Get the current ProviderFamily
    provider_family = data.ProviderFamily[i]
    
    # Fetch data specific to the ProviderFamily
    df = get_provider_family_data(table="vervotech_hotel_map_new", engine=engine, provider_family=provider_family)
    
    # Create a specific folder path for each ProviderFamily
    folder_path = f'./vervotech_json_files/{provider_family}'
    
    # Save the JSON files in the specific folder
    save_json_files_by_vervotechid(df, folder_path)


## Function: Return here date and time

In [45]:
def new_data_latest_update_dataTime(table, engine):
    query = f"SELECT MAX(created_at) AS last_update_time FROM {table};"
    df = pd.read_sql(query, engine)
    latest_dateTime = df.iloc[0, 0]
    return latest_dateTime

data = new_data_latest_update_dataTime(table="vervotech_hotel_map_new", engine=engine)
print(data)


2024-10-25 17:00:10


## Function: Return here only date

In [50]:
def new_date_latest_update_only_date(table, engine):
    query = f"SELECT MAX(created_at) AS last_update_time FROM {table};"
    df = pd.read_sql(query, engine)
    latest_dateTime = df.iloc[0, 0]
    latest_date = latest_dateTime.strftime('%Y-%m-%d')
    return latest_date

data = new_date_latest_update_only_date(table="vervotech_hotel_map_new", engine=engine)
print(data)

2024-10-25
