In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 13.9 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 63.3 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 65.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from keras.models import Model
from tensorflow.keras import utils as np_utils
from keras.utils.np_utils import to_categorical

import transformers
from transformers import AutoTokenizer,TFDistilBertModel, DistilBertConfig
from transformers import TFAutoModel

import warnings
warnings.filterwarnings("ignore")

In [None]:
print(tf.__version__)
print(keras.__version__)
     

2.9.2
2.9.0


In [None]:
import os 
os.chdir('/content/drive/MyDrive/FSDS/Live Class Materials/Deep Learning/NLP/BBC News Sample Solution')

In [None]:
df = pd.read_csv('data/BBC News Train.csv', header=0,index_col=0)
df.head()

Unnamed: 0_level_0,Text,Category
ArticleId,Unnamed: 1_level_1,Unnamed: 2_level_1
1833,worldcom ex-boss launches defence lawyers defe...,business
154,german business confidence slides german busin...,business
1101,bbc poll indicates economic gloom citizens in ...,business
1976,lifestyle governs mobile choice faster bett...,tech
917,enron bosses in $168m payout eighteen former e...,business


In [None]:
df.shape

(1490, 2)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1490 entries, 1833 to 538
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Text      1490 non-null   object
 1   Category  1490 non-null   object
dtypes: object(2)
memory usage: 34.9+ KB


In [None]:
#types of categories
df['Category'].value_counts()

sport            346
business         336
politics         274
entertainment    273
tech             261
Name: Category, dtype: int64

In [None]:
list(df['Category'].unique())

['business', 'tech', 'politics', 'sport', 'entertainment']

In [None]:
X = df['Text']
y = df['Category']
     
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state = 0)

In [None]:
encoder = LabelEncoder()
encoder.fit(y_train)

y_train = encoder.transform(y_train)
y_test = encoder.transform(y_test)

num_classes = np.max(y_train) + 1
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1192,) (1192, 5)
(298,) (298, 5)


In [None]:
y_train

array([[0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       ...,
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.]], dtype=float32)

In [None]:
# Creating tokenizer
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
bert = TFAutoModel.from_pretrained('distilbert-base-uncased')

Downloading:   0%|          | 0.00/363M [00:00<?, ?B/s]

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertModel: ['activation_13', 'vocab_projector', 'vocab_layer_norm', 'vocab_transform']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [None]:
for layer in bert.layers:
      layer.trainable = True

In [None]:
# Tokenization of the data
def text_encode(text, tokenizer, max_len=100):
    tokens = text.apply(lambda x: tokenizer(x,return_tensors='tf', 
                                            truncation=True,
                                            padding='max_length',
                                            max_length=max_len, 
                                            add_special_tokens=True))
    input_ids= []
    attention_mask=[]
    for item in tokens:
        input_ids.append(item['input_ids'])
        attention_mask.append(item['attention_mask'])
    input_ids, attention_mask=np.squeeze(input_ids), np.squeeze(attention_mask)

    return [input_ids,attention_mask]

In [None]:
X_train_input_ids, X_train_attention_mask = text_encode(X_train, tokenizer, max_len=100)
X_test_input_ids, X_test_attention_mask = text_encode(X_test, tokenizer, max_len=100)

In [None]:
# model creation
def build_model(bert_model, maxlen=100):
   input_ids = tf.keras.Input(shape=(maxlen,),dtype=tf.int32, name='input_ids')
   attention_mask = tf.keras.Input(shape=(maxlen,),dtype=tf.int32, name='attention_mask')

   sequence_output = bert_model(input_ids,attention_mask=attention_mask)
   output = sequence_output[0][:,0,:]
   output = tf.keras.layers.Dense(32,activation='relu')(output)
   output = tf.keras.layers.Dropout(0.2)(output)
   output = tf.keras.layers.Dense(5,activation='softmax')(output)

   model = tf.keras.models.Model(inputs = [input_ids,attention_mask], outputs = [output])
   model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

   return model

In [None]:
model = build_model(bert, maxlen=100)

In [None]:
"""
history = model.fit(
    [X_train_input_ids, X_train_attention_mask],
    y_train,
    batch_size=32,
    validation_data=([X_test_input_ids, X_test_attention_mask], y_test),
    epochs=100
)
"""

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
import pickle
with open('/trainHistoryDict', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

In [None]:
"""
with open('/trainHistoryDict', "rb") as file_pi:
    history = pickle.load(file_pi)
"""

In [None]:
!ls

'BBC News Classification project.ipynb'   model.h5	    submission1.csv
'BBC News Project'			  my_history.npy    trainHistoryDict
'BBC News Sample Solution.csv'		  Solfile2.csv
 data					  Solfile2.gsheet


In [None]:
model.save('model.h5')

In [None]:
#model = tf.keras.models.load_model('model.h5', custom_objects={'TFDistilBertModel': TFDistilBertModel})

In [None]:
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

In [None]:
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")

AttributeError: ignored

In [None]:
loss, accuracy = model.evaluate([X_test_input_ids, X_test_attention_mask], y_test)
print('Test accuracy :', accuracy)

Test accuracy : 0.9731543660163879


In [None]:
df_test = pd.read_csv('data/BBC News Test.csv', header=0,index_col=0)
df_test.head()

Unnamed: 0_level_0,Text
ArticleId,Unnamed: 1_level_1
1018,qpr keeper day heads for preston queens park r...
1319,software watching while you work software that...
1138,d arcy injury adds to ireland woe gordon d arc...
459,india s reliance family feud heats up the ongo...
1020,boro suffer morrison injury blow middlesbrough...


In [None]:
X_df_test = df_test['Text']
     
X_df_test_input_ids, X_df_test_attention_mask = text_encode(X_df_test, tokenizer, max_len=100)

X_df_test_predict = model.predict([X_df_test_input_ids, X_df_test_attention_mask])




In [None]:
X_df_test

ArticleId
1018    qpr keeper day heads for preston queens park r...
1319    software watching while you work software that...
1138    d arcy injury adds to ireland woe gordon d arc...
459     india s reliance family feud heats up the ongo...
1020    boro suffer morrison injury blow middlesbrough...
                              ...                        
1923    eu to probe alitalia  state aid  the european ...
373     u2 to play at grammy awards show irish rock ba...
1704    sport betting rules in spotlight a group of mp...
206     alfa romeos  to get gm engines  fiat is to sto...
471     citizenship event for 18s touted citizenship c...
Name: Text, Length: 735, dtype: object

In [None]:
X_df_test_predict

array([[3.12700408e-08, 3.03398173e-09, 1.92551433e-08, 1.00000000e+00,
        5.59077340e-10],
       [6.91168793e-08, 4.21597912e-09, 8.33899563e-08, 8.17207102e-09,
        9.99999881e-01],
       [3.89121162e-08, 2.99400571e-09, 2.23214922e-08, 1.00000000e+00,
        5.37688172e-10],
       ...,
       [1.08766143e-08, 1.74771841e-08, 1.00000000e+00, 1.68793175e-08,
        1.68341285e-10],
       [9.99999285e-01, 1.48077515e-08, 8.56551452e-08, 2.59567202e-07,
        3.86650754e-07],
       [9.91971749e-09, 9.95873162e-09, 1.00000000e+00, 6.57815269e-09,
        7.02583824e-11]], dtype=float32)

In [None]:
list(df['Category'].unique())

['business', 'tech', 'politics', 'sport', 'entertainment']

In [None]:
df1 = pd.DataFrame(data = X_df_test)
df1.reset_index(inplace = True)

column_values = ['business', 'entertainment', 'politics', 'sport', 'tech']

df2 = pd.DataFrame(data = X_df_test_predict, columns = column_values)

df_test_result = pd.concat([df1, df2], axis=1)
df_test_result.head()

Unnamed: 0,ArticleId,Text,business,entertainment,politics,sport,tech
0,1018,qpr keeper day heads for preston queens park r...,3.127004e-08,3.033982e-09,1.925514e-08,1.0,5.590773e-10
1,1319,software watching while you work software that...,6.911688e-08,4.215979e-09,8.338996e-08,8.172071e-09,0.9999999
2,1138,d arcy injury adds to ireland woe gordon d arc...,3.891212e-08,2.994006e-09,2.232149e-08,1.0,5.376882e-10
3,459,india s reliance family feud heats up the ongo...,0.9999992,5.054137e-09,1.723348e-07,4.947546e-07,1.043624e-07
4,1020,boro suffer morrison injury blow middlesbrough...,2.958454e-08,3.547084e-09,1.718602e-08,1.0,5.975142e-10


In [None]:
df3 = pd.DataFrame(df_test_result.set_index('Text').drop('ArticleId', axis=1).idxmax(axis = 'columns'), columns = ['Category'])

In [None]:
df3.head()

Unnamed: 0_level_0,Category
Text,Unnamed: 1_level_1
qpr keeper day heads for preston queens park rangers keeper chris day is set to join preston on a month s loan. day has been displaced by the arrival of simon royce who is in his second month on loan from charlton. qpr have also signed italian generoso rossi. r s manager ian holloway said: some might say it s a risk as he can t be recalled during that month and simon royce can now be recalled by charlton. but i have other irons in the fire. i have had a yes from a couple of others should i need them. day s rangers contract expires in the summer. meanwhile holloway is hoping to complete the signing of middlesbrough defender andy davies - either permanently or again on loan - before saturday s match at ipswich. davies impressed during a recent loan spell at loftus road. holloway is also chasing bristol city midfielder tom doherty.,sport
software watching while you work software that can not only monitor every keystroke and action performed at a pc but also be used as legally binding evidence of wrong-doing has been unveiled. worries about cyber-crime and sabotage have prompted many employers to consider monitoring employees. the developers behind the system claim it is a break-through in the way data is monitored and stored. but privacy advocates are concerned by the invasive nature of such software. the system is a joint venture between security firm 3ami and storage specialists bridgehead software. they have joined forces to create a system which can monitor computer activity store it and retrieve disputed files within minutes. more and more firms are finding themselves in deep water as a result of data misuse. sabotage and data theft are most commonly committed from within an organisation according to the national hi-tech crime unit (nhtcu) a survey conducted on its behalf by nop found evidence that more than 80% of medium and large companies have been victims of some form of cyber-crime. bridgehead software has come up with techniques to prove to a legal standard that any stored file on a pc has not been tampered with. ironically the impetus for developing the system came as a result of the freedom of information act which requires companies to store all data for a certain amount of time. the storage system has been incorporated into an application developed by security firm 3ami which allows every action on a computer to be logged. potentially it could help employers to follow the trail of stolen files and pinpoint whether they had been emailed to a third party copied printed deleted or saved to cd floppy disk memory stick or flash card. other activities the system can monitor include the downloading of pornography the use of racist or bullying language or the copying of applications for personal use. increasingly organisations that handle sensitive data such as governments are using biometric log-ins such as fingerprinting to provide conclusive proof of who was using a particular machine at any given time. privacy advocates are concerned that monitoring at work is not only damaging to employee s privacy but also to the relationship between employers and their staff. that is not the case said tim ellsmore managing director of 3ami. it is not about replacing dialogue but there are issues that you can talk through but you still need proof he said. people need to recognise that you are using a pc as a representative of a company and that employers have a legal requirement to store data he added.,tech
d arcy injury adds to ireland woe gordon d arcy has been ruled out of the ireland team for saturday s six nations clash against scotland in murrayfield. like skipper brian o driscoll d arcy failed to recover from a hamstring injury. the side will now be led by munster lock paul o connell. shane horgan switches from wing to centre where he will be joined by ulster s kevin maggs. girvan dempsey comes into the team to take the right wing spot while gavin duffy is called up to the replacements. we gave gordon a chance but it didn t work out said ireland coach eddie o sullivan. in terms of the risk element it was a sensible precaution. he should be fine for the next game but we do not want to tempt fate. maggs who will win his 67th cap was the obvious replacement at centre while shane horgan was always likely to be moved from the wing. the only other change to the ireland side from last weekend s win in rome sees wasps flanker johnny o connor replacing denis leamy. o connor will be winning his third cap after making his debut in the victory over south africa last november. : murphy dempsey horgan maggs hickie o gara stringer corrigan byrne hayes o kelly o connell capt s easterby o connor foley. : sheahan horan o callaghan miller g easterby humphreys duffy.,sport
india s reliance family feud heats up the ongoing public spat between the two heirs of india s biggest conglomerate reliance group has spilled over to the board meeting of a leading company within the group. anil ambani vice-chairman of india petrochemicals limited (ipcl) stayed away from a gathering of senior managers on thursday. the move follows a decision earlier this month by anil - the younger brother of reliance group president mukesh ambani - to resign from his post. his resignation was not accepted by his brother who is also the boss of ipcl. the ipcl board met in mumbai to discuss the company s results for the october-to-december quarter. it is understood that the board also considered anil s resignation and asked him to reconsider his decision. however anil s demand that anand jain - another ipcl board member accused by anil of creating a rift in the ambani family - be thrown out was not met. anil has accused anand jain a confidant of his brother mukesh of playing a negative role in the ambani family and being responsible for the trouble between the brothers. on wednesday the board of reliance energy another reliance group company reaffirmed its faith in anil who is the company s chief. reliance group acquired the government s 26% stake in ipcl - india s second-largest petrochemicals company - in 2002 as part of the privatisation drive. meanwhile the group s flagship company reliance industries has its board meeting on friday to consider its financial results. mukesh is the company s chairman and anil its deputy and it is expected that both brothers will come face to face in the meeting. the ambani family controls 48% of the group which is worth $17bn (£9.1bn; 745bn indian rupees). it was founded by their father dhiru bhai ambani who died two years ago.,business
boro suffer morrison injury blow middlesbrough midfielder james morrison has been ruled out for up to eight weeks after an operation on tuesday. the 18-year-old who has played in 13 of the club s last 14 games had surgery to repair a double hernia. a club spokesman confirmed: it is a bilateral sportsman s hernia which was operated on yesterday. morrison was sent for scans after being substituted at half-time during boro s 2-1 uefa cup win over graz ak in which he scored the equaliser. his injury is the latest blow for the teessiders who have been without gaizka mendieta george boateng and mark viduka for extended periods. meanwhile the kick-off time for boro s uefa cup match at sporting lisbon on 17 march has been brought forward from 2115 gmt to 1945 gmt.,sport


In [None]:
df_test_final_result = pd.concat([pd.DataFrame(df_test_result['ArticleId']), df3.reset_index()], axis=1)

In [None]:
df_test_final_result.head()

Unnamed: 0,ArticleId,Text,Category
0,1018,qpr keeper day heads for preston queens park r...,sport
1,1319,software watching while you work software that...,tech
2,1138,d arcy injury adds to ireland woe gordon d arc...,sport
3,459,india s reliance family feud heats up the ongo...,business
4,1020,boro suffer morrison injury blow middlesbrough...,sport


In [None]:
df_test_final_result.drop('Text', axis=1).to_csv('submission1.csv', header=True, index=False)

In [None]:
df_test_final_result.to_csv('Solfile2.csv', header=True, index=False)

In [None]:
text = "In their 3-0 World Cup semifinal win over Croatia, eight of Argentina’s starting XI were the same as the 1-2 defeat to Saudi Arabia in their tournament opener. Yet, on the pitch, the team looked entirely different."

In [None]:
text_df = pd.DataFrame(data=[text], columns=['Text'])

In [None]:
text_df.head()

Unnamed: 0,Text
0,In their 3-0 World Cup semifinal win over Croa...


In [None]:
X_df_test = text_df['Text']
     
X_df_test_input_ids, X_df_test_attention_mask = text_encode(X_df_test, tokenizer, max_len=100)

X_df_test_predict = model.predict([X_df_test_input_ids, X_df_test_attention_mask])



In [None]:
X_df_test_attention_mask

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [None]:
df1 = pd.DataFrame(data = X_df_test)
df1.reset_index(inplace = True)
column_values = ['business', 'entertainment', 'politics', 'sport', 'tech']

df2 = pd.DataFrame(data = X_df_test_predict, columns = column_values)

In [None]:
df1.head()

Unnamed: 0,index,Text
0,0,In their 3-0 World Cup semifinal win over Croa...


In [None]:
df2.head()

Unnamed: 0,business,entertainment,politics,sport,tech
0,2.682884e-07,6.52188e-09,1.234877e-07,0.999999,2.040717e-07
1,0.0001664625,2.544254e-06,7.887178e-06,0.999816,7.563095e-06
2,0.457665,0.1518388,0.0906077,0.023098,0.2767905
3,0.3874621,0.1088221,0.1285088,0.024346,0.350861


In [None]:
df1 = pd.DataFrame(data = X_df_test)
df1.reset_index(inplace = True)



df_test_result = pd.concat([df1, df2], axis=1)
df_test_result.head()

0.9999994

In [None]:
df3 = pd.DataFrame(df_test_result.set_index('Text').idxmax(axis = 'columns'), columns = ['Category']).iloc[:1]['Category']

In [None]:
k = df3["Category"][0]

In [None]:
df3

Unnamed: 0_level_0,Category
Text,Unnamed: 1_level_1
"In their 3-0 World Cup semifinal win over Croatia, eight of Argentina’s starting XI were the same as the 1-2 defeat to Saudi Arabia in their tournament opener. Yet, on the pitch, the team looked entirely different.",sport


In [None]:
pd.DataFrame(df_test_result.set_index('Text').idxmax(axis = 'columns'), columns = ['Category']).iloc[:1]['Category'][0]

'sport'

In [None]:
def category_predict(text):
  text_df = pd.DataFrame(data=[text], columns=['Text'])
  df_text_input_ids, df_text_attention_mask = text_encode(text_df['Text'], tokenizer, max_len=100)
  df_text_predict = model.predict([df_text_input_ids, df_text_attention_mask])
  
  text_df.reset_index(inplace = True)
  column_values = ['business', 'entertainment', 'politics', 'sport', 'tech']
  df_category_matrix = pd.DataFrame(data = df_text_predict, columns = column_values)

  df_test_result = pd.concat([text_df, df_category_matrix], axis=1)
  category = pd.DataFrame(df_test_result.set_index('Text').idxmax(axis = 'columns'), columns = ['Category']).iloc[:1]['Category'][0]
  return category

In [None]:
text1 = "Veteran actor Amitabh Bachchan took to his personal blog and posted a couple of pictures as he wrapped up the shoot of Kaun Banega Crorepati season 14. Amitabh, who hoped to return the next season, recalled a moment from his film Deewar. He wrote, “The last day of the show and the greetings from them that work so hard to make KBC what it is .a farewell or a bye to be back hopefully next year again .. and the return gift from me .. a moment of DEEWAR .. and the emotions.” The first season of KBC aired on July 3, 2000 and it was Amitabh’s first time as a television host"

In [None]:
category_predict(text1)



'entertainment'

In [None]:
text2 = 'Realme 10s, the next smartphone in the Realme 10 series, was launched in China on Friday. The Chinese manufacturer had teased a December 16 launch for its budget handset earlier this month. The Realme 10s is powered by a MediaTek Dimensity 810 SoC. The device features a 6.6-inch FHD+ IPS LCD display panel, with a 90Hz refresh rate. The Realme 10s will run Android-12 based Realme UI 3.0 out-of-the-box. The smartphone is also available in Streamer Blue and Crystal Black colour variants. The latest entrant in the Realme 10 series comes after the company had already launched the Realme 10 Pro Plus 5G, Realme 10 Pro 5G, Realme 10 5G, and the Realme 10 4G in various markets. Only the Realme 10 Pro series is available in India at the moment.'

In [None]:
category_predict(text2)



'tech'

In [None]:
text3 = 'In the wake of a contentious legal battle within English cricket, after Azeem Rafiq’s claims of institutional racism during his time as a Yorkshire cricketer which included allegations against the likes of Michael Vaughan, England’s test captain Ben Stokes says the inclusion of 18-year-old wrist spinner Rehan Ahmed in their final test in Pakistan could send a positive message to young British-Asian cricketers. “I have always felt cricket is a very inclusive sport,” he told The Guardian. “Rehan, he could be an unbelievable example to set for younger kids who want to come up. They may have maybe heard about what’s happened in cricket recently, (but) he can be seen as ‘we’ve got this 18-year-old, hopefully a potential superstar, why can’t I be that?’” Stokes said that the possibility of calling up Ahmed is only on the basis of him earning his place in the team, and speaks to England’s strength in depth in having a solid wrist-spinner in their ranks for South Asian conditions. “I don’t think it would be a case of giving caps away. We picked Rehan in the squad not just because of his talent and to integrate him, but because it would be a good opportunity to play him if we thought it was necessary,” he said.'

In [None]:
category_predict(text3)



'sport'

In [None]:
text4 = '"With the inauguration of the office in the national capital, the BRS has started its journey as national political party under KCR\'s leadership and will expand its footprint across the country," BRS MP Ranjith Reddy said. Rao had founded the Telangana Rashtra Samithi in April 2001 with a single-point agenda of creating a separate Telangana state with Hyderabad as its capital. His party stormed to power in 2014 and Rao became the first chief minister of Telangana. Twenty-one years after its formation, the TRS has officially transformed into the Bharat Rashtra Samithi (BRS). Rao has been nursing national ambitions since 2018 to provide an alternative to both the BJP and the Congress. He has been meeting several regional leaders and chief ministers, including his Bihar counterpart and JD(U) leader Nitish Kumar, Tamil Nadu Chief Minister and DMK supremo MK Stalin to forge a united front of opposition parties.'

In [None]:
category_predict(text4)



'politics'