In [None]:
import openpyxl

In [None]:
class ExcelCRUD:
    def __init__(self, input_file, output_file="output.xlsx"):
        print("Initializing Excel CRUD operations...")
        self.input_file = input_file
        self.output_file = output_file
        self.load_workbook()

    def load_workbook(self):
        print(f"Loading workbook: {self.input_file}")
        self.wkb = openpyxl.load_workbook(self.input_file)
        self.data_sheet = self.wkb.active
        print("Workbook loaded successfully.")

    def create_new_workbook(self):
        print("Creating new workbook for output...")
        self.output_wb = openpyxl.Workbook()
        self.output_sheet = self.output_wb.active
        self.output_sheet.title = "data"
        print("New workbook created.")

    def read_data(self):
        print("Reading data from input file...")
        self.data = {}
        for col in self.data_sheet.iter_cols(values_only=True):
            if col[0]:
                self.data[col[0]] = col[1:]
        print("Data read successfully.")
        

    def write_data(self):
        print("Writing data to output file...")
        print(self.output_sheet)
        # if not self.output_sheet:
        #     self.create_new_workbook()
        # self.output_sheet.append(list(self.data.keys()))
        # for col_num, (header, values) in enumerate(self.data.items(), start=1):
        #     print(f"Writing column {col_num}: {header}")
        #     for row_num, value in enumerate(values, start=2):
        #         print(f"Writing row {row_num}, value: {value}")
        #         self.output_sheet.cell(row=row_num, column=col_num, value=value)
        # print("Data writing completed.")

    def save_output(self):
        print(f"Saving output file as {self.output_file}...")
        self.output_wb.save(self.output_file)
        print("Output file saved successfully.")

    def update_cell(self, sheet_name, row, column, new_value):
        print(f"Updating cell ({row}, {column}) in sheet '{sheet_name}'...")
        sheet = self.wkb[sheet_name]
        sheet.cell(row=row, column=column, value=new_value)
        self.wkb.save(self.input_file)
        print(f"Cell ({row}, {column}) updated to '{new_value}'.")

    def delete_row(self, sheet_name, row):
        print(f"Deleting row {row} from sheet '{sheet_name}'...")
        sheet = self.wkb[sheet_name]
        sheet.delete_rows(row)
        self.wkb.save(self.input_file)
        print(f"Row {row} deleted successfully.")

    def delete_column(self, sheet_name, column):
        print(f"Deleting column {column} from sheet '{sheet_name}'...")
        sheet = self.wkb[sheet_name]
        sheet.delete_cols(column)
        self.wkb.save(self.input_file)
        print(f"Column {column} deleted successfully.")

    def execute(self):
        self.create_new_workbook()
        self.read_data()
        self.write_data()
        self.save_output()

In [None]:
e = ExcelCRUD('./MOCK_DATA.xlsx')

In [None]:
e.read_data()

print(e.data)

In [None]:
print(e.output_file)

In [None]:
e.output_file = 'output_wb.xlsx'

In [None]:
e.read_data()

In [None]:
e.write_data()

In [1]:
import os
import pandas as pd
import threading
from concurrent.futures import ThreadPoolExecutor

class FileConverter:
    def __init__(self, input_file: str, output_file: str = None, output_format: str = "xlsx"):
        self.input_file = input_file
        self.output_format = output_format.lower()
        self.file_type = self._detect_file_type()
        self.data = None
        self.lock = threading.Lock()

        
        if output_file is None:
            base_name = os.path.splitext(os.path.basename(input_file))[0]
            self.output_file = f"{base_name}.{self.output_format}"
        else:
            self.output_file = output_file

    def _detect_file_type(self) -> str:
        ext = os.path.splitext(self.input_file)[1].lower()
        if ext in [".xlsx", ".xls", ".csv", ".json"]:
            return ext
        raise ValueError("Unsupported file format. Supported formats: .xlsx, .xls, .csv, .json")

    def read_data(self):
        print(f"Reading data from {self.input_file}...")
        if self.file_type in [".xlsx", ".xls"]:
            self.data = pd.read_excel(self.input_file, engine="openpyxl", dtype=str)
        elif self.file_type == ".csv":
            self.data = pd.read_csv(self.input_file, dtype=str)
        elif self.file_type == ".json":
            self.data = pd.read_json(self.input_file, dtype=str)

        if self.data is not None and not self.data.empty:
            print(f"Successfully read {len(self.data)} rows.")
        else:
            raise ValueError("No data found in the file.")

    def write_data(self):
        if self.data is None or self.data.empty:
            raise ValueError("No data available. Please call read_data() first.")

        print(f"Writing data to {self.output_file}...")
        with self.lock:  
            if self.output_format == "xlsx":
                chunk = 1000000
                
                with pd.ExcelWriter(self.output_file, engine="openpyxl") as writer:
                    for i in range(0, len(self.data), chunk):
                        self.data.iloc[i: i + chunk].to_excel(writer, sheet_name=f"Sheet {i // chunk + 1}", index=False)
            elif self.output_format == "csv":
                self.data.to_csv(self.output_file, index=False)
            elif self.output_format == "json":
                self.data.to_json(self.output_file, orient="records")

        print(f"Successfully written {len(self.data)} rows to {self.output_file}.")

    def execute(self):
        with ThreadPoolExecutor(max_workers=2) as executor:
            read_future = executor.submit(self.read_data)
            read_future.result()  
            
            write_future = executor.submit(self.write_data)
            write_future.result()  


converter = FileConverter("./MOCK_DATA.csv", output_format="xlsx")  
converter.execute()


Reading data from ./MOCK_DATA.csv...
Successfully read 1000 rows.
Writing data to MOCK_DATA.xlsx...
Successfully written 1000 rows to MOCK_DATA.xlsx.


In [None]:
from bs4 import BeautifulSoup
import json

html_file = "case_history.html"
with open(html_file, "r", encoding="utf-8") as file:
    soup = BeautifulSoup(file, "html.parser")

case_data = {
    "Page Title": soup.title.text.strip(),
    "Case Details": {},
    "Case Status": {},
    "Petitioners and Advocates": [],
    "Respondents and Advocates": [],
    "Acts and Sections": [],
    "Case Hearing History": [],
    "Orders": [],
    "Objections": []
}

case_table = soup.find("table", class_="case_details_table")
if case_table:
    rows = case_table.find_all("tr")
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 4:
            key1, value1, key2, value2 = [col.text.strip() for col in cols]
            case_data["Case Details"][key1] = value1
            case_data["Case Details"][key2] = value2

status_table = soup.find("table", class_="table_r")
if status_table:
    rows = status_table.find_all("tr")
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 2:
            key, value = [col.text.strip() for col in cols]
            case_data["Case Status"][key] = value

petitioner_table = soup.find("span", class_="Petitioner_Advocate_table")
if petitioner_table:
    petitioners = petitioner_table.text.strip().split("\n")
    case_data["Petitioners and Advocates"] = [p.strip() for p in petitioners if p.strip()]

respondent_table = soup.find("span", class_="Respondent_Advocate_table")
if respondent_table:
    respondents = respondent_table.text.strip().split("\n")
    case_data["Respondents and Advocates"] = [r.strip() for r in respondents if r.strip()]

acts_table = soup.find("table", class_="Acts_table")
if acts_table:
    rows = acts_table.find_all("tr")[1:]
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 2:
            act_data = {"Act": cols[0].text.strip(), "Section": cols[1].text.strip()}
            case_data["Acts and Sections"].append(act_data)

history_table = soup.find("table", class_="history_table")
if history_table:
    rows = history_table.find_all("tr")[1:]
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 5:
            hearing_data = {
                "Cause List Type": cols[0].text.strip(),
                "Judge": cols[1].text.strip(),
                "Business On Date": cols[2].text.strip(),
                "Hearing Date": cols[3].text.strip(),
                "Purpose of Hearing": cols[4].text.strip()
            }
            case_data["Case Hearing History"].append(hearing_data)

order_table = soup.find("table", class_="order_table")
if order_table:
    rows = order_table.find_all("tr")[1:]
    for row in rows:
        cols = row.find_all("td")
        if len(cols) >= 4:
            order_details = {
                "Order Number": cols[0].text.strip(),
                "Order On": cols[1].text.strip(),
                "Judge": cols[2].text.strip(),
                "Order Date": cols[3].text.strip(),
                "Order Details": cols[4].text.strip(),
            }
            case_data["Orders"].append(order_details)

obj_table = soup.find("table", class_="obj_table")
if obj_table:
    rows = obj_table.find_all("tr")[1:]
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 5:
            objection = {
                "Sr. No": cols[0].text.strip(),
                "Scrutiny Date": cols[1].text.strip(),
                "Objection": cols[2].text.strip(),
                "Compliance Date": cols[3].text.strip(),
                "Receipt Date": cols[4].text.strip(),
            }
            case_data["Objections"].append(objection)

print(json.dumps(case_data, indent=4, ensure_ascii=False))


In [None]:
import re

data = ['1)', 'MS', 'MILAN', 'FRUIT', 'SUPPLIERS', 'THROUGH', 'ITS', 'PROP', 'SHAIKH', 'MOIN', 'SHAIKH', 'SHAFIK', 'AND', 'OTHERS', 'SYED', 'AZIZODDIN', 'R', 'AND', 'MAJID', 'S', 'SHAIKH', '2)', 'SHAIKH', 'MOIN', 'SHAIKH', 'SHAFIK3)', 'SIIAHIDABI', 'SHAIK4)', 'AFSARABI', 'SHAIKH', 'SHAFIK']

result = []
current_str = ""

for item in data:
    
    match = re.match(r'(\d+\))', item)  
    
    if match:
        if current_str:
            result.append(current_str.strip())  
        current_str = match.group(1)  
        remaining_text = item[len(match.group(1)):].strip()  
        if remaining_text:
            current_str += " " + remaining_text  
    else:
        
        split_match = re.split(r'(\d+\))', item)
        if len(split_match) > 1:
            for part in split_match:
                part = part.strip()
                if re.match(r'^\d+\)$', part):  
                    if current_str:
                        result.append(current_str.strip())  
                    current_str = part  
                elif part:
                    current_str += " " + part  
        else:
            current_str += " " + item  

if current_str:
    result.append(current_str.strip())  

print(result)


['1) MS MILAN FRUIT SUPPLIERS THROUGH ITS PROP SHAIKH MOIN SHAIKH SHAFIK AND OTHERS SYED AZIZODDIN R AND MAJID S SHAIKH', '2) SHAIKH MOIN SHAIKH SHAFIK', '3) SIIAHIDABI SHAIK', '4) AFSARABI SHAIKH SHAFIK']


In [1]:
import requests, json
import pandas as pd
from pathlib import Path
url = 'https://my.api.mockaroo.com/users.json'


# def write_data_to_excel(data):
#     with pd.ExcelWriter(data, engine='openpyxl') as writer:
#         data.

res = requests.get(url, params={'key': "37c9a850"})

response = res.json()
print(response)

data = pd.json_normalize(response, max_level=1)
file_path = Path.cwd() / 'output.xlsx'

data.to_excel(file_path, index=False, engine='openpyxl')



[{'id': 1, 'first_name': 'Jere', 'last_name': 'Scattergood', 'email': 'jscattergood0@microsoft.com', 'gender': 'Male'}, {'id': 2, 'first_name': 'Lemuel', 'last_name': 'Nasey', 'email': 'lnasey1@omniture.com', 'gender': 'Male'}, {'id': 3, 'first_name': 'Mabel', 'last_name': 'Knok', 'email': 'mknok2@hexun.com', 'gender': 'Non-binary'}, {'id': 4, 'first_name': 'Lewes', 'last_name': 'Kennelly', 'email': 'lkennelly3@examiner.com', 'gender': 'Male'}, {'id': 5, 'first_name': 'Janean', 'last_name': 'Westerman', 'email': 'jwesterman4@godaddy.com', 'gender': 'Female'}, {'id': 6, 'first_name': 'Rodge', 'last_name': 'Drewson', 'email': 'rdrewson5@usda.gov', 'gender': 'Male'}, {'id': 7, 'first_name': 'Ada', 'last_name': 'Simone', 'email': 'asimone6@sina.com.cn', 'gender': 'Female'}, {'id': 8, 'first_name': 'Neill', 'last_name': 'Magauran', 'email': 'nmagauran7@japanpost.jp', 'gender': 'Male'}, {'id': 9, 'first_name': 'Avrit', 'last_name': 'Rounsefell', 'email': 'arounsefell8@symantec.com', 'gender'

In [22]:
csv_file_path = Path.cwd() / 'output.csv'

data.to_csv(csv_file_path, index=False)