In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# Function to calculate RSI
def calculate_rsi(data, window=14):
    delta = data['Close'].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    
    return rsi

# Function to calculate MACD
def calculate_macd(data, short_window=12, long_window=26, signal_window=9):
    short_ema = data['Close'].ewm(span=short_window, adjust=False).mean()
    long_ema = data['Close'].ewm(span=long_window, adjust=False).mean()
    
    macd = short_ema - long_ema
    signal_line = macd.ewm(span=signal_window, adjust=False).mean()
    
    return macd, signal_line

def fetch_and_analyze_etf_data(ticker):
    asset = yf.Ticker(ticker)
    end_date = datetime.today().strftime('%Y-%m-%d')
    start_date = (datetime.today() - timedelta(days=730)).strftime('%Y-%m-%d')
    data = asset.history(start=start_date, end=end_date)
    data.reset_index(inplace=True)
    data['rsi'] = calculate_rsi(data)
    data['macd'], data['signal_line'] = calculate_macd(data)
    return data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'rsi', 'macd', 'signal_line']]

etf_data = fetch_and_analyze_etf_data("EWJ")
etf_data.to_csv("etf_momentum_analysis.csv", index=False)


In [2]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

def fetch_index_data(tickers):
    all_data = []
    for ticker, name in tickers.items():
        asset = yf.Ticker(ticker)
        end_date = datetime.today().strftime('%Y-%m-%d')
        start_date = (datetime.today() - timedelta(days=730)).strftime('%Y-%m-%d')
        data = asset.history(start=start_date, end=end_date)
        data.reset_index(inplace=True)
        data['index_name'] = name
        all_data.append(data)
    return pd.concat(all_data, ignore_index=True)

tickers = {
    "^N225": "Nikkei 225",
    "^JPN": "Japan Index"
}

index_data = fetch_index_data(tickers)
index_data.to_csv("japan_index_prices.csv", index=False)


In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
import datetime
import warnings
warnings.filterwarnings('ignore')

def get_market_volatility(window=20):  # 20-day rolling volatility by default
    # Calculate date range (adding extra days to account for the rolling window)
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(days=730 + window)
    
    # Fetch NYSE data
    nya = yf.download('^NYA', start=start_date, end=end_date)
    
    # Calculate daily returns
    nya['Returns'] = nya['Adj Close'].pct_change()
    
    # Calculate rolling volatility (annualized)
    nya['Volatility'] = nya['Returns'].rolling(window=window).std() * np.sqrt(252)
    
    # Format data
    vol_data = nya[['Volatility']].copy()
    vol_data = vol_data.dropna()  # Remove NaN values from the beginning
    vol_data.index = vol_data.index.strftime('%Y-%m-%d')
    
    # Save to CSV
    vol_data.to_csv('NYSE.market.vol.csv')
    print(f"Market {window}-day rolling volatility data saved to NYSE.market.vol.csv")
    
    return vol_data

def get_macro_indicators():
    # Set up dates
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(days=730)
    
    # Define tickers for macro data
    tickers = {
        'US_10Y_YIELD': '^TNX',  # US 10-Year Treasury Yield
        'JPN_10Y_YIELD': '^JPTBY',  # Japan 10-Year Bond Yield
        'USD_JPY': 'JPY=X'  # USD/JPY Exchange Rate
    }
    
    # Initialize empty DataFrame
    macro_data = pd.DataFrame()
    
    # Fetch data for each indicator
    for name, ticker in tickers.items():
        try:
            data = yf.download(ticker, start=start_date, end=end_date)['Close']
            macro_data[name] = data
        except Exception as e:
            print(f"Error fetching {name}: {e}")
    
    # Clean and format data
    macro_data = macro_data.fillna(method='ffill')
    macro_data.index = macro_data.index.strftime('%Y-%m-%d')
    
    # Save to CSV
    macro_data.to_csv('US.JPN.macro.indicators.csv')
    print("Macro indicators data saved to US.JPN.macro.indicators.csv")
    
    return macro_data

if __name__ == "__main__":
    print("Starting data collection...")
    vol_data = get_market_volatility()
    macro_data = get_macro_indicators()
    print("Data collection complete!")
    
    # Print the last 5 days of data as a sample
    print("\nLast 5 days of market volatility:")
    print(vol_data.tail())
    print("\nLast 5 days of macro indicators:")
    print(macro_data.tail())

[*********************100%***********************]  1 of 1 completed

Starting data collection...
Market 20-day rolling volatility data saved to NYSE.market.vol.csv



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['^JPTBY']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed

Macro indicators data saved to US.JPN.macro.indicators.csv
Data collection complete!

Last 5 days of market volatility:
Price      Volatility
Ticker               
Date                 
2024-11-08   0.107980
2024-11-11   0.107337
2024-11-12   0.108580
2024-11-13   0.105402
2024-11-14   0.105421

Last 5 days of macro indicators:
            US_10Y_YIELD  JPN_10Y_YIELD     USD_JPY
Date                                               
2024-11-08         4.306            NaN  153.179993
2024-11-11         4.308            NaN  152.904007
2024-11-12         4.432            NaN  153.479996
2024-11-13         4.451            NaN  154.654999
2024-11-14         4.414            NaN  156.358994





In [17]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

def get_major_earthquakes_japan():
    # Calculate date range for the last 2 years (730 days)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=730)
    
    # Fetch earthquake data from the USGS API
    print("Fetching major earthquake data (M7.0+)...")
    base_url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
    
    params = {
        'format': 'geojson',
        'starttime': start_date.strftime('%Y-%m-%d'),
        'endtime': end_date.strftime('%Y-%m-%d'),
        'minlatitude': 30,   # Latitude range for Japan
        'maxlatitude': 46,
        'minlongitude': 129, # Longitude range for Japan
        'maxlongitude': 146,
        'minmagnitude': 7.0  # Only consider major earthquakes (M7.0 and above)
    }
    
    try:
        response = requests.get(base_url, params=params)
        if response.status_code != 200:
            raise Exception(f"API returned status code {response.status_code}")
        
        eq_data = response.json()
        
        earthquakes = []
        for feature in eq_data['features']:
            eq = {
                'date': datetime.fromtimestamp(feature['properties']['time'] / 1000).strftime('%Y-%m-%d'),
                'magnitude': feature['properties']['mag'],
                'place': feature['properties']['place']
            }
            earthquakes.append(eq)
        
        # Convert the earthquake data into a DataFrame
        eq_df = pd.DataFrame(earthquakes)
        
    except Exception as e:
        print(f"Error fetching earthquake data: {e}")
        eq_df = pd.DataFrame(columns=['date', 'magnitude', 'place'])
    
    # Format and clean the DataFrame
    eq_df['date'] = pd.to_datetime(eq_df['date'])
    eq_df = eq_df.sort_values(by='date')
    
    # Save to CSV
    output_file = 'Japan_major_earthquakes_last_730_days.csv'
    eq_df.to_csv(output_file, index=False)
    
    print(f"\nData saved to {output_file}")
    print("\nLast 5 major earthquakes (M7.0+):")
    print(eq_df.tail())
    
    return eq_df

if __name__ == "__main__":
    print("Starting earthquake data collection for M7.0+...")
    earthquake_data = get_major_earthquakes_japan()
    print("\nData collection complete!")

## This came out as only 2 observations, thus we left it alone. Key for the poster to talk about how overcame this issue.


Starting earthquake data collection for M7.0+...
Fetching major earthquake data (M7.0+)...

Data saved to Japan_major_earthquakes_last_730_days.csv

Last 5 major earthquakes (M7.0+):
        date  magnitude                                  place
1 2024-01-01        7.5  2024 Noto Peninsula, Japan Earthquake
0 2024-08-08        7.1   2024 Hyuganada Sea, Japan Earthquake

Data collection complete!
