In [15]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)

def get_season_links(driver, team_url):
    print("Fetching season links...")
    driver.get(team_url)
    
    # Handle GDPR using more reliable detection
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//button[contains(., 'AGREE')]"))
        ).click()
        print("GDPR consent accepted")
        time.sleep(1)
    except Exception as e:
        print("No GDPR popup:", e)
    
    # Wait for main content to load using more stable identifier
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#meta h1")))
    
    # Find and expand season selector
    try:
        dropdown_button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "div#meta button.dropdown-toggle")))
        dropdown_button.click()
        print("Season dropdown expanded")
        time.sleep(1)
    except Exception as e:
        print("Failed to expand dropdown:", e)
        return []
    
    # Extract season links with improved selector
    seasons = driver.find_elements(By.CSS_SELECTOR, "div#meta div.dropdown-menu a[href*='/en/comps/']")
    season_links = [link.get_attribute('href') for link in seasons]
    
    print(f"Found {len(season_links)} seasons")
    return season_links

# Test execution
driver = setup_driver()
try:
    team_url = "https://fbref.com/en/squads/206d90db/Barcelona-Stats"
    seasons = get_season_links(driver, team_url)
    print("First 5 seasons:", seasons[:5])
    time.sleep(10)  # Keep browser open for inspection
finally:
    driver.quit()

Fetching season links...


KeyboardInterrupt: 

In [10]:
seasons

[]

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.array([1, 2, 3]).shape

(3,)

In [3]:
dictionary = {"asd": ["123"]}  # Wrap scalar value in a list
print(type(dictionary))
df1 = pd.DataFrame(dictionary)
df1

<class 'dict'>


Unnamed: 0,asd
0,123


In [None]:
with open("movies_threaded.csv", "r") as file:
    file1 = file.read()
from io import StringIO

# Convert the string content of the file into a DataFrame
df_file = pd.read_csv(StringIO(file1))
df_file

Unnamed: 0,Title,Release Year,IMDB rating,Number of Votes,Genre's,Director's,Top 3 Cast Members,Box Office Gross,Runtime
0,The Dark Knight,2008.0,9.0,,"['Action Epic', 'Epic', 'Superhero', 'Tragedy'...","['Christopher Nolan', 'Christopher Nolan']","['Christian Bale', 'Heath Ledger', 'Aaron Eckh...",1012558979,2h 32m
1,The Godfather Part II,1974.0,9.0,,"['Epic', 'Gangster', 'Tragedy', 'Crime', 'Drama']","['Francis Ford Coppola', 'Francis Ford Coppola']","['Al Pacino', 'Robert De Niro', 'Robert Duvall']",47983449,3h 22m
2,12 Angry Men,1957.0,9.0,,"['Legal Drama', 'Psychological Drama', 'Crime'...","['Sidney Lumet', 'Sidney Lumet']","['Henry Fonda', 'Lee J. Cobb', 'Martin Balsam']",2945,1h 36m
3,The Godfather,1972.0,9.2,,"['Epic', 'Gangster', 'Tragedy', 'Crime', 'Drama']","['Francis Ford Coppola', 'Francis Ford Coppola']","['Marlon Brando', 'Al Pacino', 'James Caan']",250342198,2h 55m
4,The Shawshank Redemption,1994.0,9.3,,"['Epic', 'Period Drama', 'Prison Drama', 'Drama']","['Frank Darabont', 'Frank Darabont']","['Tim Robbins', 'Morgan Freeman', 'Bob Gunton']",29332836,2h 22m
...,...,...,...,...,...,...,...,...,...
216,,,,,"['Action', 'Adventure', 'Animation', 'Biograph...",[],[],0,
217,Amores Perros,2000.0,8.0,,"['Tragedy', 'Drama', 'Thriller']","['Alejandro G. IГ±ГЎrritu', 'Alejandro G. IГ±Г...","['Emilio EchevarrГ­a', 'Gael GarcГ­a Bernal', ...",20908467,2h 34m
218,,,,,"['Action', 'Adventure', 'Animation', 'Biograph...",[],[],0,
219,,,,,"['Action', 'Adventure', 'Animation', 'Biograph...",[],[],0,


In [None]:
data1 = {"asd":[124,542,478]}
df_data1 = pd.DataFrame(data1)
df_data1
data2 = {"asd":[785,542,478]}
df_data2 = pd.DataFrame(data2)
df_data2
pd.merge(df_data1, df_data2, how='outer')

Unnamed: 0,asd
0,124
1,478
2,542
3,785


In [31]:
# Example DataFrames
df1 = pd.DataFrame({'Key': [1, 2, 3], 'Value1': ['A', 'B', 'C']})
df2 = pd.DataFrame({'Key': [3, 4, 5], 'Value2': ['X', 'Y', 'Z']})

# Outer join
result = pd.merge(df1, df2, on='Key', how='outer')
print(result)

   Key Value1 Value2
0    1      A    NaN
1    2      B    NaN
2    3      C      X
3    4    NaN      Y
4    5    NaN      Z


In [None]:
from datetime import datetime

def validate_datetime(date_string, format):
    try:
        datetime.strptime(date_string, format)
        return True
    except ValueError:
        return False

# Example usage
date_string = "2023-10-15"
format = "%Y-%m-%d"
is_valid = validate_datetime(date_string, format)
# The difference between df.loc[] and df.iloc[]:
# - df.loc[] is label-based indexing, which means you use row and column labels to access data.
# - df.iloc[] is integer-based indexing, which means you use row and column positions (integer indices) to access data.

# Example using the 'result' DataFrame:
# Using df.loc[] to access rows by label
row_by_label = result.loc[2]  # Access row with index label 2
print("Row by label (loc):")
print(row_by_label)

# Using df.iloc[] to access rows by position
row_by_position = result.iloc[2]  # Access the third row (index position 2)
print("\nRow by position (iloc):")
print(row_by_position)

In [4]:
df_file.groupby('Title').mean()

TypeError: agg function failed [how->mean,dtype->object]

In [5]:
df1.to_numpy()

array([['123']], dtype=object)

In [19]:
df_file.iloc['Title':'IMDB rating':10]

TypeError: cannot do positional indexing on RangeIndex with these indexers [Title] of type str

In [None]:
The `np.where()` function is used in a Pandas DataFrame to create conditional logic for assigning values. It allows you to apply conditions and return values based on whether the condition is `True` or `False`. This is particularly useful for creating new columns or modifying existing ones based on specific criteria.

For example:
```python
df['New_Column'] = np.where(df['Age'] > 30, 'Senior', 'Junior')
```
In this case, a new column `New_Column` is created where the value is `'Senior'` if the `Age` is greater than 30, otherwise `'Junior'`.

(array([  0,   0,   0, ..., 220, 220, 220], shape=(1980,)),
 array([0, 1, 2, ..., 5, 6, 8], shape=(1980,)))

AttributeError: module 'pandas' has no attribute 'Dataframe'