# github_trending.ipynb

### Objective:
Scrape trending repositories on GitHub Trending page
- Extract repo name, description, stars, and language
- Practice step-by-step scraping with BeautifulSoup


In [None]:
# Step 1: Import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# Step 2: Define URL and get page content
url = 'https://github.com/trending'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
# Step 3: Find one repo block to understand structure
repo = soup.find('article', class_='Box-row')
print(repo.prettify()[:1000])

In [None]:
# Step 4: Extract repo name
repo_name_tag = repo.h1.a
repo_name = repo_name_tag.text.strip().replace('\n', '').replace(' ', '')
print(f"Repository Name: {repo_name}")

In [None]:
# Step 5: Extract repo description
desc_tag = repo.find('p', class_='col-9 color-fg-muted my-1 pr-4')
description = desc_tag.text.strip() if desc_tag else 'No description'
print(f"Description: {description}")

In [None]:
# Step 6: Extract programming language
lang_tag = repo.find('span', itemprop='programmingLanguage')
language = lang_tag.text.strip() if lang_tag else 'N/A'
print(f"Language: {language}")

In [None]:
# Step 7: Extract stars count
stars_tag = repo.find('a', href=lambda x: x and x.endswith('/stargazers'))
stars = stars_tag.text.strip() if stars_tag else '0'
print(f"Stars: {stars}")

In [None]:
# Step 8: Loop through all repos
repo_names = []
descriptions = []
languages = []
stars_list = []

all_repos = soup.find_all('article', class_='Box-row')

for repo in all_repos:
    name_tag = repo.h1.a
    name = name_tag.text.strip().replace('\n', '').replace(' ', '') if name_tag else 'N/A'

    desc_tag = repo.find('p', class_='col-9 color-fg-muted my-1 pr-4')
    desc = desc_tag.text.strip() if desc_tag else 'No description'

    lang_tag = repo.find('span', itemprop='programmingLanguage')
    lang = lang_tag.text.strip() if lang_tag else 'N/A'

    stars_tag = repo.find('a', href=lambda x: x and x.endswith('/stargazers'))
    stars = stars_tag.text.strip() if stars_tag else '0'

    repo_names.append(name)
    descriptions.append(desc)
    languages.append(lang)
    stars_list.append(stars)

In [None]:
# Step 9: Create DataFrame
df = pd.DataFrame({
    'Repository': repo_names,
    'Description': descriptions,
    'Language': languages,
    'Stars': stars_list
})

df.head()

In [None]:
# Step 10: Save to CSV
df.to_csv('github_trending.csv', index=False)
print('Saved to github_trending.csv')

### Summary:
- Scraped trending repos from github.com/trending
- Extracted repo name, description, language, stars
- Saved results to CSV
