# All 5 top Leagues

In [8]:
import logging
import os
import time
from bs4 import BeautifulSoup
import requests
import pandas as pd

# LOGGING 
log_folder = "C:/Users/User/Desktop/AI_Projects/Project_05/Log"
os.makedirs(log_folder, exist_ok=True)

log_file = os.path.join(log_folder, "web_scraping.log")

logging.basicConfig(
    filename=log_file,
    filemode='a', 
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

logging.info("Log tizimi ishga tushdi — Web scraping boshlanmoqda.")


# PATH
base_path = "C:/Users/User/Desktop/AI_Projects/Project_05/Data"
raw_path = os.path.join(base_path, "Web_scraped")
merged_path = os.path.join(base_path, "Raw_data")

os.makedirs(raw_path, exist_ok=True)
os.makedirs(merged_path, exist_ok=True)

#LEAGUES URL
leagues = {
    "Premier_League": "https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1",
    "La_Liga": "https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1",
    "Bundesliga": "https://www.transfermarkt.com/bundesliga/startseite/wettbewerb/L1",
    "Serie_A": "https://www.transfermarkt.com/serie-a/startseite/wettbewerb/IT1",
    "Ligue_1": "https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1"
}

headers = {"User-Agent": "Mozilla/5.0"}
years = list(range(2005, 2026))


#SCRAPING 
for league_name, base_url in leagues.items():
    print(f"\n {league_name} ligasi uchun ma'lumotlar yuklanmoqda...")
    logging.info(f"{league_name} ligasi uchun scraping boshlandi.")

    league_dfs = []

    for year in years:
        url = f"{base_url}/plus/?saison_id={year}"
        print(f"  {year} yil ma'lumotlari olinmoqda...")
        logging.info(f"{league_name} | {year} yil uchun URL: {url}")

        try:
            response = requests.get(url, headers=headers)
            if response.status_code != 200:
                print(f"  {year} yil uchun sahifa topilmadi ({response.status_code})")
                logging.warning(f"{league_name} | {year}: sahifa topilmadi (kod {response.status_code})")
                continue

            soup = BeautifulSoup(response.text, "html.parser")
            table = soup.find("table", {"class": "items"})
            if not table:
                print(f" ⚠️ Jadval topilmadi ({year})")
                logging.warning(f"{league_name} | {year}: jadval topilmadi.")
                continue

            rows = []
            for row in table.find("tbody").find_all("tr"):
                cols = [td.get_text(strip=True) for td in row.find_all("td")]
                if cols:
                    rows.append(cols)

            if len(rows) == 0:
                logging.warning(f"{league_name} | {year}: jadval bo‘sh.")
                continue

            columns = ["Rank", "Club", "Squad size", "Average age", "Foreigners", "Market value", "Total market value"]
            df = pd.DataFrame(rows, columns=columns)
            df["League"] = league_name
            df["Season"] = year
            league_dfs.append(df)

            logging.info(f"{league_name} | {year}: {len(rows)} ta qator olindi.")

            time.sleep(1)  # Saytga haddan ortiq so‘rov yubormaslik uchun

        except Exception as e:
            logging.error(f"{league_name} | {year}: Xatolik — {e}")
            print(f" ⚠️ Xatolik: {e}")
            continue

    # 20 yillik ma'lumotni saqlash
    if league_dfs:
        league_df = pd.concat(league_dfs, ignore_index=True)
        save_file = os.path.join(raw_path, f"{league_name}_20years.csv")
        league_df.to_csv(save_file, index=False)
        print(f"  {league_name} saqlandi: {save_file}")
        logging.info(f"{league_name} uchun CSV saqlandi: {save_file}")
    else:
        print(f"{league_name} uchun ma'lumot topilmadi!")
        logging.warning(f"{league_name}: ma'lumot topilmadi!")


# TOP 5 LIGANI BIRLASHTIRISH 
print("\n 5 liganing barcha CSV fayllari birlashtirilmoqda...")
logging.info("Barcha ligalarni birlashtirish jarayoni boshlandi.")

all_files = [os.path.join(raw_path, f) for f in os.listdir(raw_path) if f.endswith(".csv")]
all_data = [pd.read_csv(f) for f in all_files]

final_df = pd.concat(all_data, ignore_index=True)
final_path = os.path.join(merged_path, "Top5_Leagues_2005_2025.csv")

final_df.to_csv(final_path, index=False)

print(f"\n Barcha 5 liga birlashtirildi → {final_path}")
print(f" Umumiy satrlar soni: {len(final_df)}")

logging.info(f" Barcha 5 liga birlashtirildi. Umumiy satrlar: {len(final_df)}")
logging.info(f"Yakuniy fayl saqlandi: {final_path}")
logging.info(" Scraping muvaffaqiyatli yakunlandi.")



 Premier_League ligasi uchun ma'lumotlar yuklanmoqda...
  2005 yil ma'lumotlari olinmoqda...
  2006 yil ma'lumotlari olinmoqda...
  2007 yil ma'lumotlari olinmoqda...
  2008 yil ma'lumotlari olinmoqda...
  2009 yil ma'lumotlari olinmoqda...
  2010 yil ma'lumotlari olinmoqda...
  2011 yil ma'lumotlari olinmoqda...
  2012 yil ma'lumotlari olinmoqda...
  2013 yil ma'lumotlari olinmoqda...
  2014 yil ma'lumotlari olinmoqda...
  2015 yil ma'lumotlari olinmoqda...
  2016 yil ma'lumotlari olinmoqda...
  2017 yil ma'lumotlari olinmoqda...
  2018 yil ma'lumotlari olinmoqda...
  2019 yil ma'lumotlari olinmoqda...
  2020 yil ma'lumotlari olinmoqda...
  2021 yil ma'lumotlari olinmoqda...
  2022 yil ma'lumotlari olinmoqda...
  2023 yil ma'lumotlari olinmoqda...
  2024 yil ma'lumotlari olinmoqda...
  2025 yil ma'lumotlari olinmoqda...
  Premier_League saqlandi: C:/Users/User/Desktop/AI_Projects/Project_05/Data\Web_scraped\Premier_League_20years.csv

 La_Liga ligasi uchun ma'lumotlar yuklanmoqda...
