In [1]:
####################################################################################
#
# Preprocessing_Migrationfiles.ipynb - script for pre-processing database migration files for the Joomla project.
# Copyright (C) 2023  Sravani Namburi
#
# Preprocessing_Migrationfiles.ipynb program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# 
# Preprocessing_Migrationfiles.ipynb program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with Preprocessing_Migrationfiles.ipynb program; If not, see <https://www.gnu.org/licenses/>.
#
####################################################################################

In [2]:
import io
import os
import re
from pathlib import Path
import shutil

In [3]:
#!/usr/bin/env python

# Extracting and transforming database schemas. #if already modified files in CT_Mod folder, remove and run this code.

def remove_sngl_comments(Text):
    Comments_lst = re.findall(r"--.*", Text)
    Comments_lst.sort(reverse=True)
    for Comment in Comments_lst:
        Text = Text.replace(Comment, "")
    return Text.strip()

def remove_mult_comments(Text):
    idx = Text.find("/*")
    if idx != -1:
        return Text[:idx]
    return Text

def mod_query(Query):
    New_Query = ""
    Lines = Query.split("\n")
    for Line in Lines:
        Line = Line.replace("\t", " ")
        Line = Line.replace("IF NOT EXISTS", "")   
        Line = Line.replace("#__", "")
        Line = Line.replace("NOT NULL COMMENT '0=auto delete; 1=keep'", "NOT NULL COMMENT '0=auto delete, 1=keep'")
        Line = Line.replace("DEFAULT CHARSET=utf8mb4 DEFAULT COLLATE=utf8mb4_unicode_ci","DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_unicode_ci")
        New_Query = "{} {}".format(New_Query, Line)

    New_Query = " ".join(New_Query.split())
    New_Query = New_Query.replace(", )", ")").replace("( ", "(").replace(" )", ")")
    return New_Query


def main():
    cwd = os.getcwd()
    Migration_Files_Path = os.path.join(cwd, 'Migrationfiles')
    Files = os.scandir(Migration_Files_Path)
    Files_List = list()

    for File in Files:
        if File.name.endswith(".sql"):
            SQL_dct = dict()
            SQL_dct["name"] = File.name
            SQL_dct["path"] = File.path
            Files_List.append(SQL_dct)

    Files_dct_lst = sorted(Files_List, key=lambda k: k["name"])

    for File_dct in Files_dct_lst:
        print(File_dct)
        with io.open(File_dct["path"], mode="r", encoding="utf-8") as f:
            Text = f.read()
            Queries = Text.split(";")
            Lines_mod = list()
            Lines = Text.split("\n")
            for Line in Lines:
                Line_mod = remove_sngl_comments(Line)
                Line_mod = remove_mult_comments(Line_mod)
                Lines_mod.append(Line_mod)

            Text = "\n".join(Lines_mod)
            Queries = Text.split(";")
            Queries_list = list()
            for Query in Queries:
                if "create table" in Query.lower():
                    Query_mod = mod_query(Query)
                    Queries_list.append(Query_mod)

        New_FileName = "{}//Migrationfiles_Mod/{}_mod.sql".format(os.getcwd(), os.path.splitext(File_dct["name"])[0])
        Mod_File = io.open("{}".format(New_FileName), mode="w", encoding="utf-8")
        for Query in Queries_list:
            Mod_File.write("{};\n\n".format(Query))
            
        Mod_File.close()

        print("Extraction and transformation of database schemas completed!")


if __name__ == "__main__":
    main()

{'name': 'joomla_4.0.0-2018-02-24.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.0.0-2018-02-24.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.0.0-2018-03-05.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.0.0-2018-03-05.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.0.0-2018-05-15.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.0.0-2018-05-15.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.0.0-2018-07-19.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.0.0-2018-07-19.sql'}
Extractio

Extraction and transformation of database schemas completed!
{'name': 'joomla_4.1.0-2022-01-08.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.1.0-2022-01-08.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.1.0-2022-01-19.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.1.0-2022-01-19.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.1.0-2022-01-24.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Joomla\\Migrationfiles\\joomla_4.1.0-2022-01-24.sql'}
Extraction and transformation of database schemas completed!
{'name': 'joomla_4.1.1-2022-02-20.sql', 'path': 'C:\\Users\\nambu\\Schema_Evolution\\MySQLDiff\\Database_Migration_Files\\Generate_Migration_Files\\Jo

### Copy processed files to the Docker_Mysqldiff folder.

In [4]:
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
parent_parent_dir = os.path.dirname(parent_dir)

source_dir = os.path.join(cwd, "Migrationfiles_Mod")
destination_dir = os.path.join(parent_parent_dir, "Docker_Mysqldiff", "Joomla", "Input")

if os.path.exists(destination_dir):
    shutil.rmtree(destination_dir)

shutil.copytree(source_dir, destination_dir)

print("Migration files copied to Docker_Mysqldiff folder successfully.")

Migration files copied to Docker_Mysqldiff folder successfully.


### Copy processed files to the Dataset folder.

In [5]:
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
pp_dir = os.path.dirname(parent_dir)
ppp_dir = os.path.dirname(pp_dir)
pppp_dir = os.path.dirname(ppp_dir)
#print(pppp_dir)


source_dir = os.path.join(cwd, "Migrationfiles_Mod")
destination_dir = os.path.join(pppp_dir, "Datasets", "Joomla", "Input", "Migration_Files")
#print(destination_dir)

if os.path.exists(destination_dir):
    shutil.rmtree(destination_dir)

shutil.copytree(source_dir, destination_dir)

print("Migration files copied to Dataset folder successfully.")

Migration files copied to Dataset folder successfully.
