In [None]:
import numpy as np
import pandas as pd

### Creating DataFrame

In [None]:
# using lists
price_data = [[100,200,400,600],
              [100,200,400,600],
              [100,200,400,600],
              [100,200,400,600]]

list_df = pd.DataFrame(price_data)
type(list_df)

In [None]:
# using dicts


price_data = {"Name":["Apple","Microsoft","Tesla","Meta"],
              "Open":[100,200,400,600],
              "High":[100,200,400,600],
              "Low":[100,200,400,600],
              "Close":[100,200,400,600]}
dict_df = pd.DataFrame(price_data)
dict_df.set_index("Name",inplace=True)

In [None]:
dict_df

In [None]:
# using read_csv
algo_df = pd.read_csv("algotrading_combined_dataset.csv")
algo_df

In [None]:
funda_df = pd.read_csv("fundamental_research_dataset_large.csv")
funda_df

### DataFrame Attributes and Methods

In [None]:
# shape
algo_df.shape
funda_df.shape

In [None]:
# dtypes

algo_df.dtypes
funda_df.dtypes

In [None]:
# index
algo_df.index
funda_df.index

In [None]:
# columns
algo_df.columns
funda_df.columns

In [None]:
# values
algo_df.values
funda_df.values

In [None]:
# head and tail
algo_df.tail(20)

In [None]:
# sample
algo_df.sample(10)

In [None]:
# info
algo_df.info()
funda_df.info()

In [None]:
# describe
algo_df.describe()

In [None]:
# isnull
algo_df.isnull().sum()
funda_df.isnull().sum()

In [None]:
# duplicated
algo_df.duplicated().sum()
funda_df.duplicated().sum()

In [None]:
# rename
algo_df.rename(columns={'High':'Max','Low':'Min'},inplace=True)

### Math Methods

In [None]:
# sum -> axis argument
algo_df.sum()

In [None]:
funda_df.sum()

In [None]:
algo_df["High"].mean()

In [None]:
funda_df["Market Cap (B)"].var()

### Selecting cols from a DataFrame

In [None]:
# single cols
algo_df["High"]



In [None]:
# multiple cols
funda_df[["Ticker","Sector","Market Cap (B)"]]


### Selecting rows from a DataFrame

- **iloc** - searches using index positions
- **loc** - searches using index labels

In [None]:
# single row

algo_df

In [None]:
# Single row
algo_df.iloc[0]

In [None]:
# Multiple
algo_df.iloc[0:10:2]

In [None]:
# fancy indexing
algo_df.iloc[[1,4,7,9,10,13,1300]]

In [407]:
# loc

funda_df.loc[["AAPL","GOOGL","MSFT"]]

Unnamed: 0_level_0,Company Name,Sector,Industry,Market Cap (B),EPS,P/E Ratio,Dividend Yield (%),P/B Ratio,D/E Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,Apple Inc.,Finance,Banks,173.47,9.40,40.14,2.83,11.32,1.21
AAPL,Apple Inc.,Consumer Goods,Food Products,225.94,,30.86,6.19,18.69,0.49
AAPL,Apple Inc.,Finance,Investment Services,308.99,7.63,10.92,7.97,2.47,1.22
AAPL,Apple Inc.,Utilities,Water Utilities,273.18,-8.76,44.64,1.42,13.51,1.17
AAPL,Apple Inc.,Healthcare,Pharmaceuticals,171.88,,7.38,3.65,1.04,0.31
...,...,...,...,...,...,...,...,...,...
MSFT,Microsoft Corporation,Energy,Renewable Energy,170.29,-4.32,38.12,8.37,14.35,1.92
MSFT,Microsoft Corporation,Consumer Goods,Beverages,53.79,,,9.54,,2.03
MSFT,Microsoft Corporation,Healthcare,Biotechnology,277.95,0.22,21.39,4.47,2.34,2.50
MSFT,Microsoft Corporation,Finance,Banks,202.65,9.33,25.10,9.28,6.05,1.20


In [409]:
funda_df.iloc[0:10:2]

Unnamed: 0_level_0,Company Name,Sector,Industry,Market Cap (B),EPS,P/E Ratio,Dividend Yield (%),P/B Ratio,D/E Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,Apple Inc.,Finance,Banks,173.47,9.4,40.14,2.83,11.32,1.21
MSFT,Microsoft Corporation,Utilities,Gas Utilities,38.24,8.6,22.84,9.22,1.61,2.15
TSLA,"Tesla, Inc.",Finance,Banks,147.96,-9.68,40.41,3.11,,0.12
BRK.B,Berkshire Hathaway Inc.,Energy,Oil & Gas,484.27,8.97,13.7,9.01,2.92,
JPM,JPMorgan Chase & Co.,Healthcare,Biotechnology,157.02,4.02,46.94,4.59,4.96,2.75


### Selecting both rows and cols

In [415]:
funda_df.iloc[0:5,0:4]

Unnamed: 0_level_0,Company Name,Sector,Industry,Market Cap (B)
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,Apple Inc.,Finance,Banks,173.47
GOOGL,Alphabet Inc.,Finance,Banks,427.97
MSFT,Microsoft Corporation,Utilities,Gas Utilities,38.24
AMZN,"Amazon.com, Inc.",Consumer Goods,Beverages,
TSLA,"Tesla, Inc.",Finance,Banks,147.96


In [426]:
funda_df.loc["AAPL","Company Name":"Market Cap (B)"]

Unnamed: 0_level_0,Company Name,Sector,Industry,Market Cap (B)
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,Apple Inc.,Finance,Banks,173.47
AAPL,Apple Inc.,Consumer Goods,Food Products,225.94
AAPL,Apple Inc.,Finance,Investment Services,308.99
AAPL,Apple Inc.,Utilities,Water Utilities,273.18
AAPL,Apple Inc.,Healthcare,Pharmaceuticals,171.88
...,...,...,...,...
AAPL,Apple Inc.,Consumer Goods,Beverages,
AAPL,Apple Inc.,Industrial Goods,Aerospace,80.87
AAPL,Apple Inc.,Industrial Goods,Aerospace,
AAPL,Apple Inc.,Utilities,Electric Utilities,319.65


In [440]:
algo_df.loc[0:5,"Date":"Close"]

Unnamed: 0,Date,Ticker,Open,High,Low,Close
0,2015-01-01,AAPL,101.031849,101.329723,98.852354,101.19741
1,2015-01-02,AAPL,100.866496,100.866496,99.568981,101.220064
2,2015-01-05,AAPL,101.164489,101.176821,100.920124,101.339872
3,2015-01-06,AAPL,,103.203859,102.287356,101.777905
4,2015-01-07,AAPL,102.744728,104.050309,102.211701,102.09031
5,2015-01-08,AAPL,101.311978,102.094773,100.520561,102.675189


### Filtering a DataFrame

### Adding new cols

In [441]:
# completely new
algo_df

Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume
0,2015-01-01,AAPL,101.031849,101.329723,98.852354,101.197410,344567.0
1,2015-01-02,AAPL,100.866496,100.866496,99.568981,101.220064,310521.0
2,2015-01-05,AAPL,101.164489,101.176821,100.920124,101.339872,
3,2015-01-06,AAPL,,103.203859,102.287356,101.777905,295025.0
4,2015-01-07,AAPL,102.744728,104.050309,102.211701,102.090310,439080.0
...,...,...,...,...,...,...,...
32865,2021-08-02,AMZN,184.443237,185.375089,183.176491,183.087397,581356.0
32866,2017-01-24,AAPL,92.780364,93.092012,91.238510,,754158.0
32867,2018-03-09,AMZN,98.074613,99.154029,96.203177,97.487616,76911.0
32868,2018-05-02,LTC/USD,103.443224,105.575739,103.159286,,552764.0


In [442]:
algo_df["New Col"] = "Kuldeep Singh"

In [443]:
algo_df

Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume,New Col
0,2015-01-01,AAPL,101.031849,101.329723,98.852354,101.197410,344567.0,Kuldeep Singh
1,2015-01-02,AAPL,100.866496,100.866496,99.568981,101.220064,310521.0,Kuldeep Singh
2,2015-01-05,AAPL,101.164489,101.176821,100.920124,101.339872,,Kuldeep Singh
3,2015-01-06,AAPL,,103.203859,102.287356,101.777905,295025.0,Kuldeep Singh
4,2015-01-07,AAPL,102.744728,104.050309,102.211701,102.090310,439080.0,Kuldeep Singh
...,...,...,...,...,...,...,...,...
32865,2021-08-02,AMZN,184.443237,185.375089,183.176491,183.087397,581356.0,Kuldeep Singh
32866,2017-01-24,AAPL,92.780364,93.092012,91.238510,,754158.0,Kuldeep Singh
32867,2018-03-09,AMZN,98.074613,99.154029,96.203177,97.487616,76911.0,Kuldeep Singh
32868,2018-05-02,LTC/USD,103.443224,105.575739,103.159286,,552764.0,Kuldeep Singh


In [445]:
# from existing ones


algo_df["New Close"] = algo_df["Close"] * 100

In [446]:
algo_df

Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume,New Col,New Close
0,2015-01-01,AAPL,101.031849,101.329723,98.852354,101.197410,344567.0,Kuldeep Singh,10119.741016
1,2015-01-02,AAPL,100.866496,100.866496,99.568981,101.220064,310521.0,Kuldeep Singh,10122.006445
2,2015-01-05,AAPL,101.164489,101.176821,100.920124,101.339872,,Kuldeep Singh,10133.987171
3,2015-01-06,AAPL,,103.203859,102.287356,101.777905,295025.0,Kuldeep Singh,10177.790504
4,2015-01-07,AAPL,102.744728,104.050309,102.211701,102.090310,439080.0,Kuldeep Singh,10209.031003
...,...,...,...,...,...,...,...,...,...
32865,2021-08-02,AMZN,184.443237,185.375089,183.176491,183.087397,581356.0,Kuldeep Singh,18308.739709
32866,2017-01-24,AAPL,92.780364,93.092012,91.238510,,754158.0,Kuldeep Singh,
32867,2018-03-09,AMZN,98.074613,99.154029,96.203177,97.487616,76911.0,Kuldeep Singh,9748.761555
32868,2018-05-02,LTC/USD,103.443224,105.575739,103.159286,,552764.0,Kuldeep Singh,


### Important DataFrame Functions

In [None]:
# astype


In [None]:
# value_counts

1. **Identify all the top-performing stocks based on their overall return from the start to the end of the dataset.**
   - **Hint:** Calculate the return as (final close price - initial close price) / initial close price.

2. **How many instances of daily price change greater than 5% (super over finishes) occurred for each stock?**
   - **Hint:** Use the `pct_change()` method on the 'Close' price to calculate daily percentage changes.

3. **Calculate the number of times each stock's price increased on a specific date range (e.g., within the first quarter of each year).**
   - **Hint:** Filter the dataset by date and then compare 'Close' and 'Open' prices.

4. **Calculate the percentage of times the opening price being higher than the previous day's closing price (toss winner) resulted in a higher closing price on that day (match winner).**
   - **Hint:** Use the `shift()` method to compare the opening price with the previous day's closing price.

5. **Identify all assets with a daily percentage change higher than 8% and a volume greater than 100,000.**
   - **Hint:** Apply conditions on 'Daily Change' and 'Volume' columns.

6. **Identify all stocks in the Technology sector with a daily percentage change higher than 7.5%.**
   - **Hint:** Merge the datasets on 'Ticker' and filter based on the sector and daily change.

7. **Write a function that can return the performance track record of two assets against each other over time.**
   - **Hint:** Create a function that merges the close prices of two tickers on the 'Date' column.

8. **Identify and remove duplicate rows from both datasets. How many duplicates were removed?**
   - **Hint:** Use the `duplicated()` method to find and remove duplicate rows.

9. **For the fundamental dataset, identify rows where the Market Cap is NaN, and impute these values using a sector-wise median Market Cap.**
   - **Hint:** Group by sector and fill NaN values with the sector median.

10. **Filter the combined dataset to find the top 5 most volatile stocks based on the standard deviation of daily percentage changes.**
    - **Hint:** Calculate the standard deviation of daily percentage changes for each stock and sort the results.

In [None]:
import pandas as pd
import numpy as np

# Function to create a synthetic dataset for a given asset type
def create_asset_data(tickers, start_date, end_date):
    dates = pd.date_range(start=start_date, end=end_date, freq='B')
    data = []
    for ticker in tickers:
        np.random.seed(42)  # For reproducibility
        price = np.random.randn(len(dates)).cumsum() + 100
        for i in range(len(dates)):
            open_price = price[i] + np.random.uniform(-1, 1)
            high_price = max(open_price, price[i] + np.random.uniform(0, 2))
            low_price = min(open_price, price[i] - np.random.uniform(0, 2))
            close_price = price[i] + np.random.uniform(-1, 1)
            volume = np.random.randint(1000, 1000000)
            # Introduce NaN values randomly
            if np.random.rand() < 0.05:
                open_price = np.nan
            if np.random.rand() < 0.05:
                high_price = np.nan
            if np.random.rand() < 0.05:
                low_price = np.nan
            if np.random.rand() < 0.05:
                close_price = np.nan
            if np.random.rand() < 0.05:
                volume = np.nan
            data.append([dates[i], ticker, open_price, high_price, low_price, close_price, volume])
    return pd.DataFrame(data, columns=['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume'])

# Define tickers for each asset type
stock_tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'TSLA']
forex_tickers = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD', 'USD/CAD']
crypto_tickers = ['BTC/USD', 'ETH/USD', 'XRP/USD', 'LTC/USD', 'BCH/USD']

# Generate data for each asset type
stock_data = create_asset_data(stock_tickers, '2015-01-01', '2023-01-01')
forex_data = create_asset_data(forex_tickers, '2015-01-01', '2023-01-01')
crypto_data = create_asset_data(crypto_tickers, '2015-01-01', '2023-01-01')

# Combine all data into a single DataFrame
combined_data = pd.concat([stock_data, forex_data, crypto_data])

# Introduce duplicate rows
combined_data = pd.concat([combined_data, combined_data.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
combined_data.to_csv('algotrading_combined_dataset.csv', index=False)

print("Dataset created and saved to 'algotrading_combined_dataset.csv'")


In [None]:
import pandas as pd
import numpy as np

# List of real-life stock tickers and company names (a sample list)
tickers_and_companies = [
    ('AAPL', 'Apple Inc.'),
    ('GOOGL', 'Alphabet Inc.'),
    ('MSFT', 'Microsoft Corporation'),
    ('AMZN', 'Amazon.com, Inc.'),
    ('TSLA', 'Tesla, Inc.'),
    ('FB', 'Meta Platforms, Inc.'),
    ('BRK.B', 'Berkshire Hathaway Inc.'),
    ('JNJ', 'Johnson & Johnson'),
    ('JPM', 'JPMorgan Chase & Co.'),
    ('V', 'Visa Inc.'),
    # Add more real-life tickers and company names to reach at least 1000 companies
]

# Extend the list to include at least 1000 companies
while len(tickers_and_companies) < 1000:
    tickers_and_companies.extend(tickers_and_companies[:1000 - len(tickers_and_companies)])

# Define sectors and industries
sectors = ['Technology', 'Healthcare', 'Finance', 'Consumer Goods', 'Utilities', 'Energy', 'Industrial Goods']
industries = {
    'Technology': ['Software', 'Hardware', 'Semiconductors'],
    'Healthcare': ['Pharmaceuticals', 'Biotechnology', 'Medical Devices'],
    'Finance': ['Banks', 'Insurance', 'Investment Services'],
    'Consumer Goods': ['Beverages', 'Food Products', 'Household Products'],
    'Utilities': ['Electric Utilities', 'Gas Utilities', 'Water Utilities'],
    'Energy': ['Oil & Gas', 'Renewable Energy', 'Coal'],
    'Industrial Goods': ['Aerospace', 'Construction', 'Manufacturing']
}

# Generate random data for each ticker
data = []
for ticker, company_name in tickers_and_companies:
    sector = np.random.choice(sectors)
    industry = np.random.choice(industries[sector])
    market_cap = round(np.random.uniform(0.1, 500), 2)  # in billions
    eps = round(np.random.uniform(-10, 10), 2)
    pe_ratio = round(np.random.uniform(5, 50), 2)
    dividend_yield = round(np.random.uniform(0, 10), 2)
    pb_ratio = round(np.random.uniform(0.1, 20), 2)
    de_ratio = round(np.random.uniform(0, 3), 2)

    # Introduce NaN values randomly
    if np.random.rand() < 0.1: market_cap = np.nan
    if np.random.rand() < 0.1: eps = np.nan
    if np.random.rand() < 0.1: pe_ratio = np.nan
    if np.random.rand() < 0.1: dividend_yield = np.nan
    if np.random.rand() < 0.1: pb_ratio = np.nan
    if np.random.rand() < 0.1: de_ratio = np.nan

    data.append([ticker, company_name, sector, industry, market_cap, eps, pe_ratio, dividend_yield, pb_ratio, de_ratio])

# Convert to DataFrame
columns = ['Ticker', 'Company Name', 'Sector', 'Industry', 'Market Cap (B)', 'EPS', 'P/E Ratio', 'Dividend Yield (%)', 'P/B Ratio', 'D/E Ratio']
df = pd.DataFrame(data, columns=columns)

# Introduce duplicate rows
df = pd.concat([df, df.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
df.to_csv('fundamental_research_dataset_large.csv', index=False)

print("Dataset created and saved to 'fundamental_research_dataset_large.csv'")



### Dataset 1: Fundamental Research Dataset

This dataset contains fundamental data for real-life companies, with some rows containing NaN values and duplicates.

```python
import pandas as pd
import numpy as np

# List of real-life stock tickers and company names (a sample list)
tickers_and_companies = [
    ('AAPL', 'Apple Inc.'),
    ('GOOGL', 'Alphabet Inc.'),
    ('MSFT', 'Microsoft Corporation'),
    ('AMZN', 'Amazon.com, Inc.'),
    ('TSLA', 'Tesla, Inc.'),
    ('FB', 'Meta Platforms, Inc.'),
    ('BRK.B', 'Berkshire Hathaway Inc.'),
    ('JNJ', 'Johnson & Johnson'),
    ('JPM', 'JPMorgan Chase & Co.'),
    ('V', 'Visa Inc.'),
    # Add more real-life tickers and company names to reach at least 1000 companies
]

# Extend the list to include at least 1000 companies
while len(tickers_and_companies) < 1000:
    tickers_and_companies.extend(tickers_and_companies[:1000 - len(tickers_and_companies)])

# Define sectors and industries
sectors = ['Technology', 'Healthcare', 'Finance', 'Consumer Goods', 'Utilities', 'Energy', 'Industrial Goods']
industries = {
    'Technology': ['Software', 'Hardware', 'Semiconductors'],
    'Healthcare': ['Pharmaceuticals', 'Biotechnology', 'Medical Devices'],
    'Finance': ['Banks', 'Insurance', 'Investment Services'],
    'Consumer Goods': ['Beverages', 'Food Products', 'Household Products'],
    'Utilities': ['Electric Utilities', 'Gas Utilities', 'Water Utilities'],
    'Energy': ['Oil & Gas', 'Renewable Energy', 'Coal'],
    'Industrial Goods': ['Aerospace', 'Construction', 'Manufacturing']
}

# Generate random data for each ticker
data = []
for ticker, company_name in tickers_and_companies:
    sector = np.random.choice(sectors)
    industry = np.random.choice(industries[sector])
    market_cap = round(np.random.uniform(0.1, 500), 2)  # in billions
    eps = round(np.random.uniform(-10, 10), 2)
    pe_ratio = round(np.random.uniform(5, 50), 2)
    dividend_yield = round(np.random.uniform(0, 10), 2)
    pb_ratio = round(np.random.uniform(0.1, 20), 2)
    de_ratio = round(np.random.uniform(0, 3), 2)

    # Introduce NaN values randomly
    if np.random.rand() < 0.1: market_cap = np.nan
    if np.random.rand() < 0.1: eps = np.nan
    if np.random.rand() < 0.1: pe_ratio = np.nan
    if np.random.rand() < 0.1: dividend_yield = np.nan
    if np.random.rand() < 0.1: pb_ratio = np.nan
    if np.random.rand() < 0.1: de_ratio = np.nan

    data.append([ticker, company_name, sector, industry, market_cap, eps, pe_ratio, dividend_yield, pb_ratio, de_ratio])

# Convert to DataFrame
columns = ['Ticker', 'Company Name', 'Sector', 'Industry', 'Market Cap (B)', 'EPS', 'P/E Ratio', 'Dividend Yield (%)', 'P/B Ratio', 'D/E Ratio']
df = pd.DataFrame(data, columns=columns)

# Introduce duplicate rows
df = pd.concat([df, df.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
df.to_csv('fundamental_research_dataset_large.csv', index=False)

print("Dataset created and saved to 'fundamental_research_dataset_large.csv'")
```

### Dataset 2: Combined Asset Data

This dataset includes stock, Forex, and cryptocurrency data with NaN values and duplicates.

```python
import pandas as pd
import numpy as np

# Function to create a synthetic dataset for a given asset type
def create_asset_data(tickers, start_date, end_date):
    dates = pd.date_range(start=start_date, end=end_date, freq='B')
    data = []
    for ticker in tickers:
        np.random.seed(42)  # For reproducibility
        price = np.random.randn(len(dates)).cumsum() + 100
        for i in range(len(dates)):
            open_price = price[i] + np.random.uniform(-1, 1)
            high_price = max(open_price, price[i] + np.random.uniform(0, 2))
            low_price = min(open_price, price[i] - np.random.uniform(0, 2))
            close_price = price[i] + np.random.uniform(-1, 1)
            volume = np.random.randint(1000, 1000000)
            # Introduce NaN values randomly
            if np.random.rand() < 0.05:
                open_price = np.nan
            if np.random.rand() < 0.05:
                high_price = np.nan
            if np.random.rand() < 0.05:
                low_price = np.nan
            if np.random.rand() < 0.05:
                close_price = np.nan
            if np.random.rand() < 0.05:
                volume = np.nan
            data.append([dates[i], ticker, open_price, high_price, low_price, close_price, volume])
    return pd.DataFrame(data, columns=['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume'])

# Define tickers for each asset type
stock_tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'TSLA']
forex_tickers = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD', 'USD/CAD']
crypto_tickers = ['BTC/USD', 'ETH/USD', 'XRP/USD', 'LTC/USD', 'BCH/USD']

# Generate data for each asset type
stock_data = create_asset_data(stock_tickers, '2015-01-01', '2023-01-01')
forex_data = create_asset_data(forex_tickers, '2015-01-01', '2023-01-01')
crypto_data = create_asset_data(crypto_tickers, '2015-01-01', '2023-01-01')

# Combine all data into a single DataFrame
combined_data = pd.concat([stock_data, forex_data, crypto_data])

# Introduce duplicate rows
combined_data = pd.concat([combined_data, combined_data.sample(frac=0.05, random_state=42)])  # Add 5% duplicates

# Save to CSV
combined_data.to_csv('algotrading_combined_dataset.csv', index=False)

print("Dataset created and saved to 'algotrading_combined_dataset.csv'")
```

### Practice Questions

1. **Data Cleaning**:
   - Identify and remove duplicate rows from both datasets.
   - Handle NaN values appropriately (e.g., fill with mean/median, forward fill, backward fill, or remove).

2. **Data Analysis**:
   - Calculate the average Market Cap, P/E Ratio, and Dividend Yield for each sector in the fundamental research dataset.
   - Calculate the daily percentage change for each asset in the combined dataset.

3. **Data Visualization**:
   - Plot the distribution of Market Cap values for different sectors.
   - Visualize the price trends for a selected stock, Forex pair, and cryptocurrency over time.

4. **Advanced Analysis**:
   - Perform a sector-wise analysis to find which sectors have the highest and lowest average EPS in the fundamental research dataset.
   - Analyze the correlation between different Forex pairs in the combined dataset.

5. **Algorithmic Trading**:
   - Implement a simple moving average crossover strategy using the combined dataset.
   - Develop a fundamental analysis-based scoring system to rank stocks in the fundamental research dataset based on their financial metrics.

These questions and tasks should help you get hands-on experience with real-life algorithmic trading scenarios and data handling techniques.