In [33]:
import random
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import numpy as np
import plotly.graph_objects as go
import yfinance as yf
from datetime import datetime
from fuzzywuzzy import fuzz
import gradio as gr
from datetime import date, timedelta


Artificial News Generation

In [34]:


companies = ["Reliance Industries", "Tata Consultancy Services", "HDFC Bank", "Infosys", "ICICI Bank", "Hindustan Unilever", "Bajaj Finance", "Kotak Mahindra Bank", "HDFC", "Bharti Airtel", "Axis Bank", "Maruti Suzuki", "Asian Paints", "Nestle India", "UltraTech Cement", "IndusInd Bank", "Tech Mahindra", "ITC", "Sun Pharmaceutical", "Power Grid Corporation of India"]

positive_news = [
    "Shares of {} surged today after the company announced record-breaking quarterly profits.",
    "Investors cheered as {} reported strong growth in its revenue and market share.",
    "{} stock jumped significantly following positive market sentiment and strong quarterly earnings.",
    "Analysts upgraded the rating for {} after the company exceeded earnings expectations.",
    "Market optimism propelled {} to new highs as investors remain bullish on its growth prospects."
]

negative_news = [
    "Investor confidence wavered as {} reported a decline in quarterly earnings.",
    "Market volatility led to a sharp decline in {} stock prices despite the company's efforts to streamline operations.",
    "Concerns over economic slowdown weighed heavily on {} shares, leading to a significant drop in value.",
    "Investor sentiment soured after {} issued a profit warning, citing challenges in the current market environment.",
    "Rising competition and regulatory challenges contributed to a downturn in {} stock performance."
]

neutral_news = [
    "{} maintained steady performance in today's trading session amid mixed market conditions.",
    "Market analysts remain neutral on {} stock, awaiting further developments in the sector.",
    "Investor sentiment towards {} remained unchanged as the company continues to navigate market challenges.",
    "Shares of {} showed minimal movement today as investors adopted a wait-and-see approach.",
    "{} stock experienced moderate trading activity today, reflecting overall market stability."
]

news_articles = []

for _ in range(20):
    company = random.choice(companies)
    tone = random.choice(["positive", "negative", "neutral"])
    if tone == "positive":
        news = random.choice(positive_news).format(company)
    elif tone == "negative":
        news = random.choice(negative_news).format(company)
    else:
        news = random.choice(neutral_news).format(company)
    news_articles.append(news)

print(news_articles)


['Rising competition and regulatory challenges contributed to a downturn in Maruti Suzuki stock performance.', 'Market analysts remain neutral on Maruti Suzuki stock, awaiting further developments in the sector.', 'Tata Consultancy Services stock experienced moderate trading activity today, reflecting overall market stability.', 'Shares of ITC showed minimal movement today as investors adopted a wait-and-see approach.', 'Concerns over economic slowdown weighed heavily on Maruti Suzuki shares, leading to a significant drop in value.', 'Market analysts remain neutral on Asian Paints stock, awaiting further developments in the sector.', 'Tech Mahindra stock experienced moderate trading activity today, reflecting overall market stability.', 'Analysts upgraded the rating for Maruti Suzuki after the company exceeded earnings expectations.', 'Investor sentiment towards Bharti Airtel remained unchanged as the company continues to navigate market challenges.', 'Investor confidence wavered as In

In [35]:
news_articles[0]

'Rising competition and regulatory challenges contributed to a downturn in Maruti Suzuki stock performance.'

In [36]:
news = pd.DataFrame(news_articles, columns=['News'])
news

Unnamed: 0,News
0,Rising competition and regulatory challenges c...
1,Market analysts remain neutral on Maruti Suzuk...
2,Tata Consultancy Services stock experienced mo...
3,Shares of ITC showed minimal movement today as...
4,Concerns over economic slowdown weighed heavil...
5,Market analysts remain neutral on Asian Paints...
6,Tech Mahindra stock experienced moderate tradi...
7,Analysts upgraded the rating for Maruti Suzuki...
8,Investor sentiment towards Bharti Airtel remai...
9,Investor confidence wavered as Infosys reporte...


Entity Recognition

In [37]:
def extract_entities(df):
    tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
    model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

    nlp = pipeline("ner", model=model, tokenizer=tokenizer)

    entities = []
    for news in df['News']:
        ner_results = nlp(news)
        entities_in_news = []
        current_entity = ""
        for entity in ner_results:
            if entity['entity'][:2] == 'B-':
                if current_entity:
                    entities_in_news.append(current_entity.replace(" ##", ""))
                    current_entity = ""
                current_entity = entity['word']
            elif entity['entity'][:2] == 'I-':
                if current_entity:
                    current_entity += " " + entity['word']
                else:
                    current_entity = entity['word']
            else:
                if current_entity:
                    entities_in_news.append(current_entity.replace(" ##", ""))
                    current_entity = ""
        if current_entity:
            entities_in_news.append(current_entity.replace(" ##", ""))

        entities.append(entities_in_news)

    # Add new columns for entities to the DataFrame
    for i in range(len(entities[0])):
        #df[f'Entity_{i+1}'] = [entity_list[i] if len(entity_list) > i else None for entity_list in entities]
        df['entity'] = [entity_list[i] if len(entity_list) > i else None for entity_list in entities]
    return df


In [38]:
df1 = extract_entities(news)
df1

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Unnamed: 0,News,entity
0,Rising competition and regulatory challenges c...,Maruti Suzuki
1,Market analysts remain neutral on Maruti Suzuk...,Maruti Suzuki
2,Tata Consultancy Services stock experienced mo...,Tata Consultancy Services
3,Shares of ITC showed minimal movement today as...,ITC
4,Concerns over economic slowdown weighed heavil...,Maruti Suzuki
5,Market analysts remain neutral on Asian Paints...,Asian Paints
6,Tech Mahindra stock experienced moderate tradi...,Tech Mahindra
7,Analysts upgraded the rating for Maruti Suzuki...,Maruti Suzuki
8,Investor sentiment towards Bharti Airtel remai...,Bharti Airtel
9,Investor confidence wavered as Infosys reporte...,Infosys


SENTIMENT SCORING

In [39]:
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

In [40]:
nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

In [41]:
def add_sentiment_column(df):


    # Get the sentiment for each news headline in the DataFrame
    sentiments = nlp(df['News'].tolist())

    # Extract labels and scores from the sentiment predictions
    labels = [result['label'] for result in sentiments]
    scores = [result['score'] for result in sentiments]

    # Add new columns 'Sentiment' and 'Sentiment Score' to the DataFrame
    df['Sentiment'] = labels
    df['Sentiment Score'] = scores

    # Reorder columns for a clean structure (optional)
    df = df[['News','entity','Sentiment', 'Sentiment Score']]

    return df

In [42]:
add_sentiment_column(df1)

Unnamed: 0,News,entity,Sentiment,Sentiment Score
0,Rising competition and regulatory challenges c...,Maruti Suzuki,Negative,1.0
1,Market analysts remain neutral on Maruti Suzuk...,Maruti Suzuki,Neutral,0.875465
2,Tata Consultancy Services stock experienced mo...,Tata Consultancy Services,Positive,0.999515
3,Shares of ITC showed minimal movement today as...,ITC,Neutral,0.995153
4,Concerns over economic slowdown weighed heavil...,Maruti Suzuki,Negative,1.0
5,Market analysts remain neutral on Asian Paints...,Asian Paints,Neutral,0.855122
6,Tech Mahindra stock experienced moderate tradi...,Tech Mahindra,Positive,0.995595
7,Analysts upgraded the rating for Maruti Suzuki...,Maruti Suzuki,Positive,0.999999
8,Investor sentiment towards Bharti Airtel remai...,Bharti Airtel,Negative,0.927039
9,Investor confidence wavered as Infosys reporte...,Infosys,Negative,0.999996


CSV for fuzzy matching

In [43]:
df=pd.read_csv('/content/drive/MyDrive/Stock_names.csv')
df

Unnamed: 0,Company Name,NSE,NSE CODE
0,Reliance Industries Ltd.,RELIANCE.NS,RELIANCE
1,Tata Consultancy Services Ltd.,TCS.NS,TCS
2,HDFC Bank Ltd.,HDFCBANK.NS,HDFCBANK
3,Infosys Ltd.,INFY.NS,INFY
4,Hindustan Unilever Ltd.,HINDUNILVR.NS,HINDUNILVR
...,...,...,...
1236,Consolidated Finvest & Holdings Ltd.,CONSOFINVT.NS,CONSOFINVT
1237,Hindustan Media Ventures Ltd.,HMVL.NS,HMVL
1238,Finkurve Financial Services Ltd.,.NS,
1239,Asian Energy Services Ltd.,ASIANENE.NS,ASIANENE


In [44]:
# Columns to be dropped
columns_to_drop = ['NSE CODE']

# Drop specified columns
df = df.drop(columns=columns_to_drop)

In [45]:
new_col = {'Company Name':'Companies','NSE':'Symbols'}
df.rename(columns = new_col, inplace = True)

In [46]:
df

Unnamed: 0,Companies,Symbols
0,Reliance Industries Ltd.,RELIANCE.NS
1,Tata Consultancy Services Ltd.,TCS.NS
2,HDFC Bank Ltd.,HDFCBANK.NS
3,Infosys Ltd.,INFY.NS
4,Hindustan Unilever Ltd.,HINDUNILVR.NS
...,...,...
1236,Consolidated Finvest & Holdings Ltd.,CONSOFINVT.NS
1237,Hindustan Media Ventures Ltd.,HMVL.NS
1238,Finkurve Financial Services Ltd.,.NS
1239,Asian Energy Services Ltd.,ASIANENE.NS


Extraction of data from yfinance in one go

swing strategy in candle format

In [54]:
# Function to fetch data for last n days from yfinance for a single symbol
def fetch_data_for_symbol(symbol, interval, n_days):
    print(f"Fetching data for: {symbol}, timeframe={interval}, last {n_days} days")

    # Set end date as today
    end_date = pd.Timestamp.today().strftime("%Y-%m-%d")

    start_date = (pd.Timestamp.today() - pd.DateOffset(days=n_days)).strftime("%Y-%m-%d")

    data = yf.download(symbol, start=start_date, end=end_date, interval=interval)
    return data

# Step 1: Calculate the EMA
def calculate_ema(prices, window):
    weights = np.exp(np.linspace(-1., 0., window))
    weights /= weights.sum()
    ema = np.convolve(prices, weights, mode='full')[:len(prices)]
    ema[:window] = ema[window]
    return ema

# Step 3: Generate Buy signals
def generate_buy_signals(ema_1st, ema_2nd):
    buy_signals = np.where((ema_1st.shift(1) <= ema_2nd.shift(1)) & (ema_1st > ema_2nd), 1, 0)
    return buy_signals

# Step 4: Generate Sell signals
def generate_sell_signals(ema_1st, ema_2nd):
    sell_signals = np.where((ema_1st.shift(1) >= ema_2nd.shift(1)) & (ema_1st < ema_2nd), 1, 0)
    return sell_signals

# Function for fuzzy matching
def fuzzy_matching(stock_name, stocks):
    best_match = None
    best_match_ratio = 0

    for key in stocks.keys():
        ratio = fuzz.token_set_ratio(stock_name.lower(), key.lower())
        if ratio > best_match_ratio:
            best_match_ratio = ratio
            best_match = key

    return stocks.get(best_match)

# User input for number of days to fetch data
n_days = int(input("Enter the number of days of data you want to extract: "))

# User input for EMA values
ema_1st_value = int(input("Enter the value for 1st EMA: "))
ema_2nd_value = int(input("Enter the value for 2nd EMA: "))

# Initialize an empty list to store matched companies
matched_companies = []

# Iterate over rows of df1
for index, row in df1.iterrows():
    entity_name = row['entity']

    # Use fuzzy matching to find the corresponding symbol
    matched_symbol = fuzzy_matching(entity_name, dict(zip(df['Companies'], df['Symbols'])))

    if matched_symbol is not None and entity_name not in matched_companies:
        # Add the symbol to the list of matched companies
        matched_companies.append(entity_name)

        # Fetch data for the symbol
        df_symbol = fetch_data_for_symbol(matched_symbol, interval='1d', n_days=n_days)

        # Calculate EMAs
        df_symbol[f'{ema_1st_value} EMA'] = calculate_ema(df_symbol['Close'], ema_1st_value)
        df_symbol[f'{ema_2nd_value} EMA'] = calculate_ema(df_symbol['Close'], ema_2nd_value)

        # Generate Buy and Sell signals
        df_symbol['Buy Signal'] = generate_buy_signals(df_symbol[f'{ema_1st_value} EMA'], df_symbol[f'{ema_2nd_value} EMA'])
        df_symbol['Sell Signal'] = generate_sell_signals(df_symbol[f'{ema_1st_value} EMA'], df_symbol[f'{ema_2nd_value} EMA'])

        # Create traces
        fig = go.Figure()

        # Candlestick trace
        fig.add_trace(go.Candlestick(x=df_symbol.index,
                                    open=df_symbol['Open'],
                                    high=df_symbol['High'],
                                    low=df_symbol['Low'],
                                    close=df_symbol['Close'],
                                    name='Candlestick'))

        # EMA traces
        fig.add_trace(go.Scatter(x=df_symbol.index, y=df_symbol[f'{ema_1st_value} EMA'], mode='lines', name=f'{ema_1st_value} EMA'))
        fig.add_trace(go.Scatter(x=df_symbol.index, y=df_symbol[f'{ema_2nd_value} EMA'], mode='lines', name=f'{ema_2nd_value} EMA'))

        # Buy and Sell signals
        fig.add_trace(go.Scatter(x=df_symbol.index[df_symbol['Buy Signal'] == 1], y=df_symbol['Close'][df_symbol['Buy Signal'] == 1],
                                mode='markers', marker=dict(color='blue', symbol='triangle-up', size=10),
                                name='Buy Signal'))
        fig.add_trace(go.Scatter(x=df_symbol.index[df_symbol['Sell Signal'] == 1], y=df_symbol['Close'][df_symbol['Sell Signal'] == 1],
                                mode='markers', marker=dict(color='black1', symbol='triangle-down', size=10),
                                name='Sell Signal'))

        # Update layout
        fig.update_layout(title=f'EMA Trading Strategy for {entity_name}',
                          xaxis_title='Date',
                          yaxis_title='Price',
                          hovermode='x unified',
                          showlegend=True)

        # Show plot
        fig.show()


Enter the number of days of data you want to extract: 1000
Enter the value for 1st EMA: 50
Enter the value for 2nd EMA: 100


[*********************100%%**********************]  1 of 1 completed

Fetching data for: MARUTI.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: TCS.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: ITC.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: ASIANPAINT.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: TECHM.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: BHARTIARTL.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: INFY.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: BHARATFORG.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: RELIANCE.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: HDFCBANK.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: ULTRACEMCO.NS, timeframe=1d, last 1000 days





[*********************100%%**********************]  1 of 1 completed

Fetching data for: INDUSINDBK.NS, timeframe=1d, last 1000 days





GRADIO INTERFACE WITH CANDLESTICKS

In [57]:


# Assuming df1 and df are predefined DataFrames with company information

def fetch_data_for_symbol(symbol, interval, n_days):
    try:
        end_date = pd.Timestamp.today().strftime("%Y-%m-%d")
        start_date = (pd.Timestamp.today() - pd.DateOffset(days=n_days)).strftime("%Y-%m-%d")
        data = yf.download(symbol, start=start_date, end=end_date, interval=interval)
        if data.empty:
            return None, "Failed to fetch data. The data might be empty."
        return data, None
    except Exception as e:
        return None, str(e)

def calculate_ema(prices, window):
    weights = np.exp(np.linspace(-1., 0., window))
    weights /= weights.sum()
    ema = np.convolve(prices, weights, mode='full')[:len(prices)]
    ema[:window] = ema[window]
    return ema

def generate_buy_sell_signals(df_symbol, ema_1st_value, ema_2nd_value):
    df_symbol['Buy Signal'] = np.where((df_symbol[f'{ema_1st_value} EMA'].shift(1) <= df_symbol[f'{ema_2nd_value} EMA'].shift(1)) & (df_symbol[f'{ema_1st_value} EMA'] > df_symbol[f'{ema_2nd_value} EMA']), 1, 0)
    df_symbol['Sell Signal'] = np.where((df_symbol[f'{ema_1st_value} EMA'].shift(1) >= df_symbol[f'{ema_2nd_value} EMA'].shift(1)) & (df_symbol[f'{ema_1st_value} EMA'] < df_symbol[f'{ema_2nd_value} EMA']), 1, 0)
    return df_symbol

def create_figure(df_symbol, ema_1st_value, ema_2nd_value, entity_name):
    fig = go.Figure()

    # Use Candlestick chart instead of line chart
    fig.add_trace(go.Candlestick(x=df_symbol.index,
                                 open=df_symbol['Open'],
                                 high=df_symbol['High'],
                                 low=df_symbol['Low'],
                                 close=df_symbol['Close'],
                                 name='Candlestick'))

    fig.add_trace(go.Scatter(x=df_symbol.index, y=df_symbol[f'{ema_1st_value} EMA'], mode='lines', name=f'{ema_1st_value} EMA'))
    fig.add_trace(go.Scatter(x=df_symbol.index, y=df_symbol[f'{ema_2nd_value} EMA'], mode='lines', name=f'{ema_2nd_value} EMA'))
    fig.add_trace(go.Scatter(x=df_symbol.index[df_symbol['Buy Signal'] == 1], y=df_symbol['Close'][df_symbol['Buy Signal'] == 1], mode='markers', marker=dict(color='blue', symbol='triangle-up',size=10), name='Buy Signal'))
    fig.add_trace(go.Scatter(x=df_symbol.index[df_symbol['Sell Signal'] == 1], y=df_symbol['Close'][df_symbol['Sell Signal'] == 1], mode='markers', marker=dict(color='black', symbol='triangle-down',size=10), name='Sell Signal'))

    fig.update_layout(title=f'Candlestick and EMA Trading Strategy for {entity_name}', xaxis_title='Date', yaxis_title='Price', hovermode='x unified', showlegend=True)

    return fig

def main(entity_name, n_days, ema_1st_value, ema_2nd_value):
    matched_symbol = entity_name  # Implement fuzzy matching as needed.
    df_symbol, error = fetch_data_for_symbol(matched_symbol, interval='1d', n_days=n_days)
    if error:
        return create_error_figure(f"Error: {error}")

    if df_symbol is not None and not df_symbol.empty:
        try:
            df_symbol['Close'] = pd.to_numeric(df_symbol['Close'], errors='coerce')  # Ensure 'Close' is numeric for calculation
            df_symbol[f'{ema_1st_value} EMA'] = calculate_ema(df_symbol['Close'], ema_1st_value)
            df_symbol[f'{ema_2nd_value} EMA'] = calculate_ema(df_symbol['Close'], ema_2nd_value)
            df_symbol = generate_buy_sell_signals(df_symbol, ema_1st_value, ema_2nd_value)
            fig = create_figure(df_symbol, ema_1st_value, ema_2nd_value, entity_name)
            return fig
        except ValueError as e:
            return create_error_figure(f"Error: {str(e)}")
    else:
        return create_error_figure("Error: No data available for the given symbol.")

def create_error_figure(error_message):
    fig = go.Figure()
    fig.add_annotation(text=error_message, xref="paper", yref="paper", showarrow=False, font=dict(size=20, color="red"))
    fig.update_layout(xaxis_visible=False, yaxis_visible=False)
    return fig

iface = gr.Interface(fn=main,
                     inputs=[gr.Textbox(label="Entity Name"), gr.Number(label="Number of Days"), gr.Number(label="1st EMA Value"), gr.Number(label="2nd EMA Value")],
                     outputs=gr.Plot(label="Candlestick and EMA Strategy Plot"),  # Updated plot label
                     description="Enter the company's name, number of days for data, and two EMA values to plot the EMA trading strategy.")
iface.launch(debug=True)


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://3ca36384055bc2f038.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


[*********************100%%**********************]  1 of 1 completed


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://3ca36384055bc2f038.gradio.live


