# Great Learning
is a leading global ed-tech company that offers a variety of online and hybrid learning programs in collaboration with top universities. The platform provides courses in various domains such as Data Science, Artificial Intelligence, Machine Learning, Business Analytics, Cybersecurity, Cloud Computing, Digital Marketing, and more. Great Learning aims to deliver high-quality education and professional training to help individuals upskill and advance their careers through flexible learning options like online, on-campus, and live virtual formats.

### Importing Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Web Scraping

In [2]:
url = 'https://www.mygreatlearning.com/'

### using Headers 
it helps in mimicing a common web browser to make my request appears as if it is coming from a real user browsing the website , 
as without using it acessing will be Forbidden

In [3]:
headers = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}

In [4]:
#viewing type of request to know if it 200 or forbiden
page = requests.get(url, headers=headers)
page

<Response [200]>

In [5]:
#get source code of the website
soup = BeautifulSoup(page.content, "html.parser")
soup

 <!DOCTYPE html>

<!--suppress HtmlRequiredTitleElement -->
<html class="no-js" lang="en">
<head>
<meta charset="utf-8"/>
<!--  <link rel="manifest" href="/manifest.json">-->
<meta content="#026adb" name="theme-color">
<title>Great Learning: Online Courses, PG Certificates and Degree Programs</title>
<meta content="Learn the in-demand skills to advance your career in AI, ML, data science, digital marketing, cloud, software engg &amp; more. Get degrees &amp; certificate programs from top universities." name="description"/>
<meta content="online education, online courses, online professional courses, great learning, my great learning, online tech courses, online learning, online higher education, e-learning, online pg programs, online pg course, learn online, great learning course, learn online, career courses, online pg programs, online certifications, online training and certifications, study online, online career" name="keywords"/>
<link href="https://www.mygreatlearning.com/" rel="ca

In [6]:
soup.find_all("div")

[<div class="main">
 <input id="logIn_status" type="hidden" value=""/>
 <input id="cip" type="hidden" value=""/>
 <input id="lead_stage" type="hidden" value=""/>
 <input id="is_embedded_webview" type="hidden" value=""/>
 <input data-asset-host="//d1vwxdpzbgdqj.cloudfront.net" data-is-us-consent="false" data-iseuropean="false" data-isrestricted="false" id="visitor-country" name="visitor-country" type="hidden" value="EG"/>
 <input data-freemium-program="false" data-intl="true" data-mobile-app-att="false" data-page-group="home_new_intl" data-pg-code="home_new_variant_intl" id="gl-page-settings" type="hidden">
 </style>
 <style nonce="c6f8041485142463667e431b" type="text/css">.hide{display:none !important}.main-menu-wrapper{display:block}.main-menu-wrapper .menu-group-name{padding:0;border:0;font-size:20px;line-height:28px;color:#000;font-weight:500;background-color:transparent}.main-menu-wrapper .menu-group-name::after,.main-menu-wrapper .menu-group-name::before{display:none}.nav-containe

In [7]:
soup.find_all('div',class_='quick-links-block')

[<div class="quick-links-block">
 <p class="quick-links__heading">Data Science and Business Analytics</p>
 <p class="quick-links__desc">
 <span class="nav-seperator">23 programs</span>
 </p>
 </div>,
 <div class="quick-links-block">
 <p class="quick-links__heading">AI &amp; Machine Learning</p>
 <p class="quick-links__desc">
 <span class="nav-seperator">20 programs</span>
 </p>
 </div>,
 <div class="quick-links-block">
 <p class="quick-links__heading">Software Development</p>
 <p class="quick-links__desc">
 <span class="nav-seperator">6 programs</span>
 </p>
 </div>,
 <div class="quick-links-block">
 <p class="quick-links__heading">Management</p>
 <p class="quick-links__desc">
 <span class="nav-seperator">5 programs</span>
 </p>
 </div>,
 <div class="quick-links-block">
 <p class="quick-links__heading">Cloud Computing</p>
 <p class="quick-links__desc">
 <span class="nav-seperator">5 programs</span>
 </p>
 </div>,
 <div class="quick-links-block">
 <p class="quick-links__heading">Design<

### Creating Programs Table

In [8]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# URL to scrape
url = "https://www.mygreatlearning.com/"

# Fetch the content of the webpage
response = requests.get(url)

# Initialize the list to hold program data
programs = []

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched the webpage!")
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all program blocks within the 'quick-links__item' class
    quicklink_items = soup.find_all('li', class_='quick-links__item js-quicklink-item')

    for item in quicklink_items:
        # Extract program name
        program_name = item.find('p', class_='quick-links__heading').get_text(strip=True)
        
        # Filter specific program names
        if program_name in ["Study in USA", "Study in Germany", "FREE resources for study abroad", "GL eXcelerate", "Webinar on Demand", "All Study Abroad Programs"]:
            programs.append({'ID': len(programs) + 1, 'Name': program_name})

else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")

# Convert the list to a pandas DataFrame
programs_df = pd.DataFrame(programs)

# Remove duplicates based on the 'Name' column
programs_df = programs_df.drop_duplicates(subset='Name').reset_index(drop=True)

# Display the DataFrame
programs_df


Successfully fetched the webpage!


Unnamed: 0,ID,Name
0,1,All Study Abroad Programs
1,2,Study in USA
2,3,Study in Germany
3,4,FREE resources for study abroad


### Creating Fields Table

In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL to scrape
url = "https://www.mygreatlearning.com/"

# Fetch the content of the webpage
response = requests.get(url)

# Initialize the list to hold category data
Fields = []

# List of program names to remove from Fields DataFrame
programs_to_remove = [
    "Study in USA", 
    "Study in Germany", 
    "FREE resources for study abroad", 
    "GL eXcelerate", 
    "Webinar on Demand", 
    "All Study Abroad Programs",
    "Study Abroad Programs",          # Additional programs to remove
    "Study in USA Programs", 
    "Study in Germany Programs"
]

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched the webpage!")
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all category blocks
    categories = soup.find_all('div', class_='quick-links-block')

    for category in categories:
        # Extract category name
        category_name = category.find('p', class_='quick-links__heading').get_text(strip=True)
        
        # Extract number of programs
        desc = category.find('p', class_='quick-links__desc')
        
        # Check if 'desc' is not None before proceeding
        if desc is not None:
            # Extract number of programs
            program_count_span = desc.find('span', class_='nav-seperator')
            
            # Check if program_count_span is not None before proceeding
            if program_count_span is not None:
                program_count = program_count_span.get_text(strip=True)
                
                # Convert number of programs to integer if possible
                try:
                    number_of_programs = int(program_count.split()[0])  # Extracts the number (e.g., '23 programs' -> 23)
                except (ValueError, IndexError):
                    number_of_programs = None  # In case of missing or malformed data
            else:
                number_of_programs = None
        else:
            number_of_programs = None

        # Append the data as a dictionary to the list
        Fields.append({
            'Category Name': category_name,
            'Number of Programs': number_of_programs
        })

else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")

# Convert the list to a pandas DataFrame
fields_df = pd.DataFrame(Fields)

# Remove rows with NaN in 'Number of Programs' column
fields_df.dropna(subset=['Number of Programs'], inplace=True)

# Remove duplicates based on the 'Category Name' column
fields_df.drop_duplicates(subset='Category Name', inplace=True)

# Remove rows where 'Category Name' is in programs_to_remove
fields_df = fields_df[~fields_df['Category Name'].isin(programs_to_remove)]

# Reset index after dropping rows to keep it sequential
fields_df.reset_index(drop=True, inplace=True)
fields_df.index += 1
fields_df.index.name = 'ID'

# Display the DataFrame
fields_df


Successfully fetched the webpage!


Unnamed: 0_level_0,Category Name,Number of Programs
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Data Science and Business Analytics,23.0
2,AI & Machine Learning,20.0
3,Software Development,6.0
4,Management,5.0
5,Cloud Computing,5.0
6,Design,4.0
7,Cyber Security,5.0
8,Digital Marketing,5.0
9,MBA Courses,2.0


### Creating Courses Table

In [11]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import random

# URL to scrape
url = "https://www.mygreatlearning.com/"

# Fetch the content of the webpage
response = requests.get(url)

# Initialize the list to hold course data
courses = []

# List of programs to establish a relationship with the programs table
programs_table = [
    {"ID": 1, "Name": "All Study Abroad Programs"},
    {"ID": 2, "Name": "Study in USA"},
    {"ID": 3, "Name": "Study in Germany"},
    {"ID": 4, "Name": "FREE resources for study abroad"}
]

# List of fields to establish a relationship with the field table
field_table = [
    {"ID": 1, "Category Name": "Data Science and Business Analytics", "Number of Programs": 23},
    {"ID": 2, "Category Name": "AI & Machine Learning", "Number of Programs": 20},
    {"ID": 3, "Category Name": "Software Development", "Number of Programs": 6},
    {"ID": 4, "Category Name": "Management", "Number of Programs": 5},
    {"ID": 5, "Category Name": "Cloud Computing", "Number of Programs": 5},
    {"ID": 6, "Category Name": "Design", "Number of Programs": 4},
    {"ID": 7, "Category Name": "Cyber Security", "Number of Programs": 5},
    {"ID": 8, "Category Name": "Digital Marketing", "Number of Programs": 5},
    {"ID": 9, "Category Name": "MBA Courses", "Number of Programs": 2}
]

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched the webpage!")
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all program blocks within the 'program-card__unit' class
    program_cards = soup.find_all('li', class_='program-card__unit')

    for idx, card in enumerate(program_cards, start=1):
        # Extract the program name
        program_name = card.find('h3', class_='program-detail__name').get_text(strip=True)

        # Extract the institution
        institution = card.find('p', class_='program-detail__partner').get_text(strip=True)

        # Extract the duration and mode of delivery
        term = card.find('p', class_='program-detail__term').get_text(strip=True).replace("\n", " ").strip()

        # Split duration and mode
        duration_match = re.search(r'(\d+)\s*(month|week|year|day|months|weeks|years|days)', term, re.IGNORECASE)
        if duration_match:
            duration_number = int(duration_match.group(1))
            duration_unit = duration_match.group(2).lower()
        else:
            duration_number = None
            duration_unit = None
        
        # Determine Mode of Delivery
        if "Online" in term:
            mode_of_delivery = "Online"
        elif "On-campus" in term:
            mode_of_delivery = "On-campus"
        elif "Live Virtual" in term:
            mode_of_delivery = "Live Virtual"
        else:
            mode_of_delivery = None

        # Determine Program ID from programs_table
        if mode_of_delivery in ["Online", "Live Virtual"]:
            # Randomly assign program ID 1 or 4 if the mode is 'Online' or 'Live Virtual'
            program_id = random.choice([1, 4])
        else:
            # Assign based on the 'term' matching any program name
            program_id = None
            for program in programs_table:
                if program['Name'] in term:
                    program_id = program['ID']
                    break

        # Ensure mode_of_delivery is set to "Online" for specific program IDs
        if program_id in [1, 4] and mode_of_delivery is None:
            mode_of_delivery = "Online"

        # Determine Field ID from field_table based on program name
        if program_name in [
            "PG Program in Artificial Intelligence and Machine Learning", 
            "MS Artificial Intelligence and Machine Learning",
            "MIT Data Science and Machine Learning Program"
        ]:
            field_id = 2
        elif program_name == "Doctor Of Business Administration in Artificial Intelligence":
            field_id = 4
        elif program_name in [
            "Master of Data Science (Global) Program", 
            "MS in Data Science Programme", 
            "MS Data Analytics"
        ]:
            field_id = 1
        else:
            # Determine field_id based on general categorization
            if "Data Science" in program_name or "Business Analytics" in program_name:
                field_id = 1
            elif "AI" in program_name or "Machine Learning" in program_name:
                field_id = 2
            elif "Management" in program_name:
                field_id = 4
            else:
                field_id = random.randint(1, 9)  # Randomly assign if no specific match found

        # Randomly generate a realistic price for the course
        if duration_unit == "week":
            price = random.randint(200, 1000) * duration_number
        elif duration_unit == "month":
            price = random.randint(1000, 5000) * duration_number
        elif duration_unit == "year":
            price = random.randint(5000, 20000) * duration_number
        else:
            price = random.randint(200, 1000)  # Default price for unspecified duration

        # Randomly generate a realistic number of enrollments
        if duration_unit in ["week", "weeks"]:
            enrollments = random.randint(200, 1000)
        elif duration_unit in ["month", "months"]:
            enrollments = random.randint(100, 500)
        elif duration_unit in ["year", "years"]:
            enrollments = random.randint(20, 100)
        else:
            enrollments = random.randint(50, 200)  # Default enrollments for unspecified duration

        # Calculate total profit
        total_profit = price * enrollments

        # Extract badge/Highlights if available
        badge = card.find('div', class_='program-detail__bagde')
        badge_text = badge.get_text(strip=True) if badge else None

        # Append the data as a dictionary to the list
        courses.append({
            'ID': idx,
            'Institution': institution,
            'Program Name': program_name,
            'Duration Number': duration_number,
            'Duration Unit': duration_unit,
            'Mode of Delivery': mode_of_delivery,
            'Program ID': program_id,
            'Field ID': field_id,
            'Badge': badge_text,
            'Price': price,
            'Number of Enrollments': enrollments,
            'Total Profit': total_profit
        })

else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")

# Convert the list to a pandas DataFrame
courses_df = pd.DataFrame(courses)

# Display the DataFrame
courses_df


Successfully fetched the webpage!


Unnamed: 0,ID,Institution,Program Name,Duration Number,Duration Unit,Mode of Delivery,Program ID,Field ID,Badge,Price,Number of Enrollments,Total Profit
0,1,AI & Machine Learning,PG Program in Artificial Intelligence and Mach...,7,month,Online,1,2,,15652,196,3067792
1,2,AI & Machine Learning,No Code AI and Machine Learning: Building Data...,12,week,Online,4,1,Learn from MIT Faculty,5760,989,5696640
2,3,AI & Machine Learning,Applied Data Science Program,12,week,Live Virtual,1,1,,4584,259,1187256
3,4,AI & Machine Learning,MIT Data Science and Machine Learning Program,12,week,Online,4,2,Learn from MIT Faculty,8904,391,3481464
4,5,Data Science & Business Analytics,PG Program in Data Science and Business Analytics,6,month,Online,1,1,,8214,479,3934506
...,...,...,...,...,...,...,...,...,...,...,...,...
80,81,Walsh College,MS Data Analytics,2,year,,2,1,95% Placement Rate,28740,87,2500380
81,82,Walsh College,MS Artificial Intelligence and Machine Learning,2,year,,2,2,95% Placement Rate,16092,95,1528740
82,83,Northwestern University,MS in Data Science Programme,18,month,Online,1,1,Live Sessions,60156,420,25265520
83,84,Deakin University,Master of Data Science (Global) Program,24,month,Online,4,1,,29496,234,6902064


#  PreProcessing

### Programs Table

In [12]:
programs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      4 non-null      int64 
 1   Name    4 non-null      object
dtypes: int64(1), object(1)
memory usage: 192.0+ bytes


In [13]:
programs_df.isnull().sum().sum()

0

In [14]:
programs_df.duplicated().sum()

0

### Fields Table

In [15]:
fields_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 1 to 9
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Category Name       9 non-null      object 
 1   Number of Programs  9 non-null      float64
dtypes: float64(1), object(1)
memory usage: 276.0+ bytes


In [16]:
fields_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Number of Programs,9.0,8.333333,7.582875,2.0,5.0,5.0,6.0,23.0


In [17]:
fields_df.isnull().sum().sum()

0

In [18]:
fields_df.duplicated().sum()

0

### Courses Table 

In [19]:
courses_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85 entries, 0 to 84
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   ID                     85 non-null     int64 
 1   Institution            85 non-null     object
 2   Program Name           85 non-null     object
 3   Duration Number        85 non-null     int64 
 4   Duration Unit          85 non-null     object
 5   Mode of Delivery       56 non-null     object
 6   Program ID             85 non-null     int64 
 7   Field ID               85 non-null     int64 
 8   Badge                  49 non-null     object
 9   Price                  85 non-null     int64 
 10  Number of Enrollments  85 non-null     int64 
 11  Total Profit           85 non-null     int64 
dtypes: int64(7), object(5)
memory usage: 8.1+ KB


In [20]:
courses_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ID,85.0,43.0,24.6813,1.0,22.0,43.0,64.0,85.0
Duration Number,85.0,12.0,6.998299,2.0,6.0,12.0,20.0,28.0
Program ID,85.0,2.588235,1.256356,1.0,1.0,2.0,4.0,4.0
Field ID,85.0,2.976471,2.478366,1.0,1.0,2.0,4.0,9.0
Price,85.0,30648.21,28318.42,2688.0,8214.0,19894.0,54220.0,112704.0
Number of Enrollments,85.0,365.3765,228.9702,34.0,211.0,315.0,446.0,989.0
Total Profit,85.0,9530289.0,10247440.0,803712.0,2722500.0,5045238.0,11390388.0,43404768.0


In [21]:
courses_df.describe(include='object').T

Unnamed: 0,count,unique,top,freq
Institution,85,18,Clark University,14
Program Name,85,36,MS in Computer Science,6
Duration Unit,85,3,month,51
Mode of Delivery,56,2,Online,52
Badge,49,14,94% Placement Rate,14


In [22]:
courses_df.duplicated().sum()

0

In [23]:
courses_df.isnull().sum()

ID                        0
Institution               0
Program Name              0
Duration Number           0
Duration Unit             0
Mode of Delivery         29
Program ID                0
Field ID                  0
Badge                    36
Price                     0
Number of Enrollments     0
Total Profit              0
dtype: int64

In [24]:
existing_modes = courses_df['Mode of Delivery'].dropna().unique()
courses_df['Mode of Delivery'] = courses_df['Mode of Delivery'].fillna(pd.Series(np.random.choice(existing_modes, size=len(courses_df))))


In [25]:
# to view rows of nulls 
null_badge_rows = courses_df[courses_df['Badge'].isnull()]
null_badge_rows

Unnamed: 0,ID,Institution,Program Name,Duration Number,Duration Unit,Mode of Delivery,Program ID,Field ID,Badge,Price,Number of Enrollments,Total Profit
0,1,AI & Machine Learning,PG Program in Artificial Intelligence and Mach...,7,month,Online,1,2,,15652,196,3067792
2,3,AI & Machine Learning,Applied Data Science Program,12,week,Live Virtual,1,1,,4584,259,1187256
4,5,Data Science & Business Analytics,PG Program in Data Science and Business Analytics,6,month,Online,1,1,,8214,479,3934506
6,7,University of Texas - McCombs,PG Program in Artificial Intelligence and Mach...,7,month,Online,4,2,,21826,179,3906854
8,9,MIT Professional Education,Applied Data Science Program,12,week,Live Virtual,1,1,,9852,253,2492556
10,11,University of Texas - McCombs,PG Program in Data Science and Business Analytics,6,month,Online,4,1,,7500,422,3165000
12,13,Wharton Online,Advanced Digital Marketing and Growth Strategies,12,week,Online,4,1,,5748,540,3103920
13,14,utaustin,Pg Program in Full Stack Software Development,6,month,Online,4,3,,15468,267,4129956
14,15,utaustin,Pg Program in User Experience design (UX Design),6,month,Online,1,2,,25494,270,6883380
15,16,Wharton Online,Advanced Digital Marketing and Growth Strategies,12,week,Online,1,9,,5472,644,3523968


In [26]:
#check if there is Institution equal to it have value in badge
badge_value = courses_df[courses_df['Institution'] == 'Microsoft']['Badge']
print(badge_value)

27                None
41    with PL-300 Prep
42                None
57                None
58                None
Name: Badge, dtype: object


In [27]:
courses_df.loc[(courses_df['Institution'] == 'AI & Machine Learning') & (courses_df['Badge'].isnull()), 'Badge'] = 'Learn from MIT Faculty'
courses_df.loc[(courses_df['Institution'] == 'MIT Professional Education') & (courses_df['Badge'].isnull()), 'Badge'] = 'Learn from MIT Faculty'
courses_df.loc[(courses_df['Institution'] == 'Microsoft') & (courses_df['Badge'].isnull()), 'Badge'] = 'with PL-300 Prep'

In [28]:
values = ['No Programming Exp Required', '360° Cloud Learning']

# Apply the random choice to the 'Badge' column where conditions are met
mask = (courses_df['Institution'] == 'University of Texas - McCombs') & (courses_df['Badge'].isnull())
courses_df.loc[mask, 'Badge'] = np.random.choice(values, size=mask.sum())

values2 = ['Dedicated Career Support', 'Certificate from Great Lakes']

# Apply the random choice to the 'Badge' column where the second condition is met
mask2 = (courses_df['Institution'] == 'Great Lakes Executive Learning') & (courses_df['Badge'].isnull())
courses_df.loc[mask2, 'Badge'] = np.random.choice(values2, size=mask2.sum())

In [29]:
courses_df.isnull().sum()

ID                        0
Institution               0
Program Name              0
Duration Number           0
Duration Unit             0
Mode of Delivery          0
Program ID                0
Field ID                  0
Badge                    17
Price                     0
Number of Enrollments     0
Total Profit              0
dtype: int64

In [30]:
courses_df['Badge'].unique()

array(['Learn from MIT Faculty', None, '360° Cloud Learning',
       'No Programming Exp Required', '94% Placement Rate',
       '95% Placement Rate', 'Upto 3 years STEM OPT Visa',
       'with PL-300 Prep', 'Live Sessions', 'No Programming Experience',
       '90%+ Placement rate', 'Dedicated Career Support',
       'Upto 18 months Job Seeker VISA', 'No GRE/GMAT required',
       'Certificate from Great Lakes'], dtype=object)

In [31]:
#get top 5 values then replace null by them
top_values = courses_df['Badge'].dropna().value_counts().head(5).index.tolist()
mask = courses_df['Badge'].isnull()
courses_df.loc[mask, 'Badge'] = np.random.choice(top_values, size=mask.sum())

In [32]:
courses_df.isnull().sum().sum()

0

## Downloading csv files

In [34]:
import pandas as pd
import os
# Save DataFrames to CSV 
programs_df.to_csv('Programs.csv', index=False)
fields_df.to_csv('Fields.csv', index=False)
courses_df.to_csv('Courses.csv', index=False)
# Print the current working directory to ensure the files are saved there
print("Current Working Directory:", os.getcwd())
# List all files in the directory to check if they were saved
print("Files in Directory:", os.listdir(os.getcwd()))

Current Working Directory: C:\Users\HP\NTI
Files in Directory: ['.ipynb_checkpoints', 'airline.csv', 'airline.ipynb', 'airline.xlsx', 'airlinepassen.csv', 'airlinepassen.xlsx', 'airline_passenger_satisfaction.csv', 'all_perth_310121.csv', 'amazon.csv', 'Amazon.ipynb', 'anime.csv', 'Anime.ipynb', 'cargame.csv', 'categories.csv', 'Courses.csv', 'EtsyProducts.csv', 'Example1.csv', 'Fields.csv', 'Final Project.ipynb', 'first_pro.csv', 'first_pro.xlsx', 'first_project.ipynb', 'house_price.csv', 'house_price.xlsx', 'NTI 3.ipynb', 'NTI1.ipynb', 'NTI2.ipynb', 'Popular_Language.csv', 'Programs.csv', 'Project.ipynb', 'rating.csv', 'Supported_colors.csv', 'test.ipynb', 'Untitled1.ipynb', 'Untitled2.ipynb', 'Web Scrapping.ipynb']


In [35]:
from IPython.display import FileLink
# Provide a link to download the CSV file
display(FileLink('Programs.csv'))
display(FileLink('Fields.csv'))
display(FileLink('Courses.csv'))

In [36]:
import sqlite3
import pandas as pd

# Create a connection to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('courses_database.db')

# Save DataFrames to SQL tables
programs_df.to_sql('Programs', conn, if_exists='replace', index=False)
fields_df.to_sql('Fields', conn, if_exists='replace', index=False)
courses_df.to_sql('Courses', conn, if_exists='replace', index=False)

# Close the database connection
conn.close()

print("DataFrames have been successfully saved to SQL tables.")


DataFrames have been successfully saved to SQL tables.


  sql.to_sql(


In [37]:
display(FileLink('courses_database.db'))