In [1]:
# imports
import os
import sys
import dvc.api
import pandas as pd
import dataframe_image as dfi
from dotenv import load_dotenv
import cohere
from cohere.classify import Example
import warnings
warnings.filterwarnings('ignore')

In [2]:
# adding and setting up scripts
sys.path.append('.')
sys.path.append('..')
sys.path.insert(1, '../scripts/')
import defaults as defs
import dataCleaner as dc
import dataVisualizer as dv
# load your environment
load_dotenv()
# use your own api key here
cohere_api_key = os.getenv('cohere_api_key')
cleaner = dc.dataCleaner('news scoring using co:here API notebook')
visualizer = dv.dataVisualizer('news scoring using co:here API notebook')

logger <Logger dataCleaner (DEBUG)> created at path: ../logs/cleaner_root.log
Data cleaner in action
logger <Logger dataVisualizer (DEBUG)> created at path: ../logs/visualizer_root.log
Data visualizer in action


In [3]:
# pandas settings
pd.set_option('display.max_columns', 30)

# version of the data
# v1 : local-store
version = 'v1'

# set up the dat url
news_url = dvc.api.get_url(path = defs.news_local_path, 
                       repo = defs.repo, 
                       rev = version)

# print news path
print(f'news data path: {news_url}')

news data path: /home/f0x-tr0t/Documents/dvc-store//50/1fbc56d932bcb51d74876281ec8f71


In [4]:
# reading csv files
DateCols = ['timestamp']
missing_values = ["n/a", "na", "undefined", '?', 'NA', 'undefined']

news_data = pd.read_csv(news_url, na_values=missing_values, parse_dates=DateCols, low_memory=False)

# News scoring using co:here API

In [5]:
# setting up api key
# use your own api key here
co = cohere.Client(cohere_api_key)

### Setting up model parameters

In [6]:
news_data.columns

Index(['Domain', 'Title', 'Description', 'Body', 'Link', 'timestamp',
       'Analyst_Average_Score', 'Analyst_Rank', 'Reference_Final_Score'],
      dtype='object')

* As we have seen in the demo, it will be best to classify the scores first prior to requesting the API

In [7]:
news_data['Analyst_Average_Score'].value_counts()

0.00    7
1.33    1
1.66    1
0.33    1
Name: Analyst_Average_Score, dtype: int64

In [8]:
news_data['Analyst_Rank'].value_counts()

4    7
2    1
1    1
3    1
Name: Analyst_Rank, dtype: int64

* We can classify the average score into 6 categories.
    * 1- lovely    ---   below 0.33
    * 2- good         ---    between 0.33 and 0.66
    * 3- neutral      ---    between 0.66 and 1.22
    * 4- risky      ---     between 1.22 and 1.55
    * 5- detrimental --- greater than: 1.55

In [9]:
def classifyBasedOnScore(df: pd.DataFrame) -> str:
    """
    Classify based on score.
    Parameters
    =--------=
    df: pandas dataframe
        the data frame to classify
    """
    if df['Analyst_Average_Score'] <= 0.33:
        return 'lovely'
    if df['Analyst_Average_Score'] <= 0.66:
        return 'good'
    if df['Analyst_Average_Score'] <= 1.22:
        return 'neutral'
    if df['Analyst_Average_Score'] <= 1.55:
        return 'risky'
    if df['Analyst_Average_Score'] > 1.55:
        return 'detrimental'

In [10]:
news_data['class'] = news_data.apply(lambda news_data: classifyBasedOnScore(df= news_data), axis=1)

In [11]:
news_data[['Analyst_Average_Score', 'class']]

Unnamed: 0,Analyst_Average_Score,class
0,0.0,lovely
1,0.0,lovely
2,0.0,lovely
3,0.0,lovely
4,0.0,lovely
5,1.33,risky
6,0.0,lovely
7,1.66,detrimental
8,0.33,lovely
9,0.0,lovely


* Now we can use this class feature for the co:here model

In [12]:
# setting up classify parameters

# model type to use
model_type='large'

# TODO: change these to the news body
# the news sentences (body) to get classification for
classification_inputs=["Am I still able to return my order?", "When can I expect my package?"]

# TODO: change these to the news body and analyst average score
# the existing news prepared in these example formats to feed to the model

# TODO: set up the news classifications like this
type_one = 'Shipping and handling policy'
type_two = 'Start return or exchange'
type_three = 'Track orders'

# TODO: set up the news examples like this
prompt_examples = [Example("Do you offer same day shipping?", type_one),
            Example("Can you ship to Italy?", type_one),
            Example("How long does shipping take?", type_one),
            Example("Can I buy online and pick up in store?", type_one),
            Example("What are your shipping options?", type_one),

            Example("My order arrived damaged, can I get a refund?", type_two),
            Example("You sent me the wrong item", type_two),
            Example("I want to exchange my item for another colour", type_two),
            Example("I ordered something and it wasn’t what I expected. Can I return it?", type_two),
            Example("What’s your return policy?", type_two),

            Example("Where’s my package?", type_three),
            Example("When will my order arrive?", type_three),
            Example("What’s my shipping number?", type_three),
            Example("Which carrier is my package with?", type_three),
            Example("Is my package delayed?", type_three)]

In [13]:
# setting up classify parameters and examples 

response = co.classify(
  model=model_type,
  inputs=classification_inputs,
  examples=prompt_examples
)

In [14]:
# finally get the classification for the  news and convert it back to a numerical range factor

print('The confidence levels of the labels are:\n{}'.format(response.classifications))

The confidence levels of the labels are:
[cohere.Classification {
	input: Am I still able to return my order?
	prediction: Start return or exchange
	confidence: [cohere.Confidence {
	label: Shipping and handling policy
	confidence: 0.32005534
}, cohere.Confidence {
	label: Start return or exchange
	confidence: 0.5335526
}, cohere.Confidence {
	label: Track orders
	confidence: 0.14639212
}]
}, cohere.Classification {
	input: When can I expect my package?
	prediction: Track orders
	confidence: [cohere.Confidence {
	label: Shipping and handling policy
	confidence: 0.27741268
}, cohere.Confidence {
	label: Start return or exchange
	confidence: 0.30839407
}, cohere.Confidence {
	label: Track orders
	confidence: 0.41419324
}]
}]


* Finally receive the prediction of the classification