### Install Required Libraries

In [None]:
%pip install pymongo pandas selenium

### IMPORT LIBRARIES

In [None]:
from pymongo import MongoClient
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException


### DATA SCRAPING FUNCTION

In [None]:
def scrape_course_data():
    """
    Scrapes course registration data from the university portal using Selenium
    Returns a pandas DataFrame with the scraped data
    """
    # Setup Brave options
    brave_options = Options()
    
    # Path to Brave browser (adjust for your system)
    brave_options.binary_location = "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
    
    # Add additional options
    brave_options.add_argument("--start-maximized")
    # brave_options.add_argument("--headless")  # Uncomment for headless mode
    
    # Initialize the driver for Brave
    driver = webdriver.Chrome(options=brave_options)
    
    try:
        # Navigate to the login page
        print("Navigating to login page...")
        driver.get("https://chreg.eng.cu.edu.eg/")
        
        # Wait for the login form to load
        print("Waiting for login form...")
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.ID, "txtUsername"))
        )
        
        # Fill in credentials
        print("Entering credentials...")
        username_field = driver.find_element(By.ID, "txtUsername")
        password_field = driver.find_element(By.ID, "txtPassword")
        
        username_field.clear()
        username_field.send_keys("ID")
        
        password_field.clear()
        password_field.send_keys("Password")
        
        # Click the login button
        print("Logging in...")
        login_button = driver.find_element(By.ID, "ext-gen24")
        login_button.click()
        
        # Wait for the page to load after login
        print("Waiting for shortcuts to load...")
        time.sleep(3)
        
        # Click on the Registration Status Report shortcut
        print("Clicking on Registration Status Report...")
        registration_shortcut = driver.find_element(By.ID, "mdl_win_17-shortcut")
        registration_shortcut.click()
        
        # Wait for the registration status window to load
        print("Waiting for registration data to load...")
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.ID, "cont_win_17_GridView1"))
        )
        
        # Wait for the table to load
        print("Waiting for data to load...")
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.ID, "cont_win_17_GridView1"))
        )
        
        # Extract table data
        print("Extracting data...")
        table = driver.find_element(By.ID, "cont_win_17_GridView1")
        rows = table.find_elements(By.TAG_NAME, "tr")
        
        # Process table data
        headers = [th.text for th in rows[0].find_elements(By.TAG_NAME, "th")]
        data = []
        
        for row in rows[1:]:
            cols = row.find_elements(By.TAG_NAME, "td")
            data.append([col.text for col in cols])
        
        # Create a DataFrame
        df = pd.DataFrame(data, columns=headers)
        
        # Save to CSV
        df.to_csv("course_registration_status.csv", index=False, encoding='utf-8-sig')
        print("Data successfully scraped and saved to course_registration_status.csv")
        
        return df
        
    except TimeoutException:
        print("Timeout: Elements not found within the expected time")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None
    finally:
        # Close the browser
        driver.quit()


### LOAD AND CLEAN DATA

In [None]:
df = pd.read_csv("course_registration_status.csv")

print("Cleaning data...")
df2 = df.copy()
df2["Code"] = df["Code"].str[2:-2]
df2["From"] = df["From"].str[:-1]
df2["To"] = df["To"].str[:-3]
df2['Type'] = df['Type'].str[:-1]

### Remove unnecessary columns

In [None]:
df2 = df2.drop(['Class Size', 'Enrolled', 'Waiting', 'Status', 'Date'], axis=1)

###  Process time columns

In [None]:
df3 = df2.copy()
df3["From"] = pd.to_datetime(df3["From"], format='%H:%M')
df3["To"] = pd.to_datetime(df3["To"], format='%H:%M')

### Shift hours between 01:00 and 07:00 → +12

In [None]:
df3["From"] = df3["From"].apply(lambda t: t + pd.Timedelta(hours=12) if (1 <= t.hour <= 7) else t)
df3["To"] = df3["To"].apply(lambda t: t + pd.Timedelta(hours=12) if (1 <= t.hour <= 7) else t)

# Convert back to string format HH:MM
df3["From"] = df3["From"].dt.strftime("%H:%M")
df3["To"] = df3["To"].dt.strftime("%H:%M")

### Create course catalog

In [None]:
courses = df3[['Code', 'Name']]
courses.drop_duplicates(inplace=True)

###  DATABASE OPERATIONS

In [None]:
client = MongoClient("mongodb+srv://mohanedmohamed184_db_user:snlZBpb3ibDHJ8Sz@cufe.ovkt5ij.mongodb.net/?retryWrites=true&w=majority&appName=cufe")

db = client["Dry_Run"]
collection = db["Courses"]
collection2 = db["Selections"]

### Clear existing data

In [None]:
collection.delete_many({})
collection2.delete_many({})

### Insert course schedule data

In [None]:
for _, row in df3.iterrows():
    course = {
        "day": row["Day"],
        "course_code": row["Code"].strip("_"),
        "name": row["Name"].strip(),
        "g_number": int(row["Group"]),
        "type": row["Type"],
        "start_time": row["From"],
        "end_time": row["To"],
        "location": row["Location"],
    }
    collection.insert_one(course)

### Insert course catalog data

In [None]:
for _, row in courses.iterrows():
    course = {
        "course_code": row["Code"].strip("_"),
        "name": row["Name"].strip(),
    }
    collection2.insert_one(course)