In [11]:
import torch
import pandas as pd
import re
import numpy as np
import pickle

from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [4]:
torch.cuda.is_available()

True

In [5]:
df = pd.read_csv("Data/preprocessed_courses.csv")

In [6]:
df.columns

Index(['Unnamed: 0', 'course_id', 'url', 'title', 'translated_title',
       'is_paid', 'instructors', 'translated_instructors', 'image_480x270',
       'published_title', 'headline', 'translated_headline', 'num_subscribers',
       'avg_rating', 'avg_rating_recent', 'rating', 'num_reviews',
       'num_published_lectures', 'num_published_practice_tests',
       'has_closed_caption', 'created_at', 'instructional_level',
       'published_at', 'objectives_summary', 'translated_objectives_summary',
       'is_recently_published', 'last_update_date', 'content_info', 'category',
       'char_count', 'description', 'preprocessed_description_stemmed',
       'preprocessed_description_unstemmed'],
      dtype='object')

In [18]:
class Preprocessor:
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.stemmer = PorterStemmer()

    def remove_punctuation(self, text):
        punct_pattern = re.compile(r'[^\w\s]')
        return punct_pattern.sub(' ', text)

    def remove_stopwords(self, text):
        filtered_text = [word for word in text.split() if word.lower() not in self.stop_words]
        return ' '.join(filtered_text)

    def remove_extra_whitespaces(self, text):
        whitespace_pattern = re.compile(r'\s+')
        return whitespace_pattern.sub(' ', text)

    def remove_numbers(self, text):
        number_pattern = re.compile(r'\d+')
        return number_pattern.sub(' ', text)

    def stem_text_porter(self, text):
        stemmed_words = [self.stemmer.stem(word) for word in text.split()]
        return ' '.join(stemmed_words)

    def preprocessing_with_stemming(self, text):
        text = text.lower()
        text = self.remove_punctuation(text)
        text = self.remove_numbers(text)
        text = self.remove_stopwords(text)
        text = self.remove_extra_whitespaces(text)
        text = self.stem_text_porter(text)
        return text

    def preprocessing_without_stemming(self, text):
        text = text.lower()
        text = self.remove_punctuation(text)
        text = self.remove_numbers(text)
        text = self.remove_stopwords(text)
        text = self.remove_extra_whitespaces(text)
        return text


class SBERTModel:
    def __init__(self, data):
        self.data = data
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = SentenceTransformer('distilbert-base-nli-mean-tokens', device=self.device)
        self.preprocessor = Preprocessor()

    def fit_transform(self):
        self.course_embeddings = self.model.encode(self.data['preprocessed_description_stemmed'].tolist()) 

    def semantic_search_stemmed(self, query, n_tops=5, similarity_threshold=0.07):
        preprocessed_query = self.preprocessor.preprocessing_with_stemming(query)
        query_embedding = self.model.encode(preprocessed_query)
        cos_similarities = util.pytorch_cos_sim(query_embedding.unsqueeze(0), self.course_embeddings)
        relevant_indices = cos_similarities.squeeze() >= similarity_threshold
        filtered_data = self.data[relevant_indices]
        if not filtered_data.empty:
            top_results = filtered_data.iloc[cos_similarities[relevant_indices].argsort()[::-1][:n_tops]]
            return top_results
        else:
            return pd.DataFrame(columns=self.data.columns)

    def semantic_search_unstemmed(self, query, n_tops=5, similarity_threshold=0.07):
        preprocessed_query = self.preprocessor.preprocessing_without_stemming(query)
        query_embedding = self.model.encode(preprocessed_query)
        cos_similarities = util.pytorch_cos_sim(query_embedding.unsqueeze(0), self.course_embeddings)
        relevant_indices = cos_similarities.squeeze() >= similarity_threshold
        filtered_data = self.data[relevant_indices]
        if not filtered_data.empty:
            top_results = filtered_data.iloc[cos_similarities[relevant_indices].argsort()[::-1][:n_tops]]
            return top_results
        else:
            return pd.DataFrame(columns=self.data.columns)

    def semantic_search(self, query, top_n=5, similarity_threshold=0.07):
        preprocessed_query = self.preprocessor.preprocessing_with_stemming(query)
        query_embedding = self.model.encode(preprocessed_query)

        # Convert numpy arrays to PyTorch tensors
        query_tensor = torch.tensor(query_embedding, dtype=torch.float32)
        course_tensor = torch.tensor(self.course_embeddings, dtype=torch.float32)
        # Reshape query tensor (1,768)
        query_tensor = query_tensor.unsqueeze(0)
        
        # Compute cosine similarity using PyTorch
        cos_similarities = torch.nn.functional.cosine_similarity(course_tensor, query_tensor, dim=1)

        # Sort documents based on similarity
        top_indices = np.array(cos_similarities.argsort(descending=True))

        print(top_indices.shape)
        
        # Filter documents with similarity score > similarity_threshold
        # results = {"results":[]}
        # for index in top_indices:
        #     similarity_score = cos_similarities[index]
        #     filtered_item = {}
        #     if similarity_score > similarity_threshold:
        #         filtered_item['item_title'] = self.data.iloc[index]['title']
        #         filtered_item['item_translated_title'] = self.data.iloc[index]['translated_title']
        #         filtered_item['item_headline'] =  self.data.iloc[index]['headline']
        #         filtered_item['item_translated_headline'] =  self.data.iloc[index]['translated_headline']
        #         filtered_item['item_objectives'] = self.data.iloc[index]['objectives_summary']
        #         filtered_item['item_translated_objectives'] =  self.data.iloc[index]['translated_objectives_summary']
        #         filtered_item['item_score'] = similarity_score
        #         results["results"].append(filtered_item)


        # Filter documents with similarity score > similarity_threshold
        results = {"results":[]}
        
        # Filter indices based on similarity threshold
        indices_above_threshold = top_indices[cos_similarities[top_indices] > similarity_threshold]
        
        # Retrieve filtered items directly from DataFrame
        for index in indices_above_threshold:
            filtered_item = {
                'item_title': self.data.at[index, 'title'],
                'item_translated_title': self.data.at[index, 'translated_title'],
                'item_headline': self.data.at[index, 'headline'],
                'item_translated_headline': self.data.at[index, 'translated_headline'],
                'item_objectives': self.data.at[index, 'objectives_summary'],
                'item_translated_objectives': self.data.at[index, 'translated_objectives_summary'],
                'item_score': cos_similarities[index]
            }
            results["results"].append(filtered_item)
        return results

In [19]:
model = SBERTModel(df)

In [20]:
model.fit_transform()

In [21]:
# Save the model to a file using pickle
with open('model\sbert_model.pxl', 'wb') as file:
    pickle.dump(model, file)

In [61]:
query = "basics of calesthenics"

In [62]:
a = model.semantic_search(query, top_n=5, similarity_threshold=0.7)

(111018,)


In [63]:
print('\nResult: ')
item_names = [item['item_translated_title'] for item in a['results']]
item_os = [item['item_translated_objectives'] for item in a['results']]
item_head = [item['item_translated_headline'] for item in a['results']]
item_score = [item['item_score'] for item in a['results']]

for i in range(len(a['results'])):
    print(f'''{i}, {item_names[i]}
    Headline : {item_head[i]}
    Objectives Summary : {item_os[i]}
    Similarity Score : {item_score[i]}
    ''')
    


Result: 
0, Basics of 5S
    Headline : 5S terminology
    Objectives Summary : ['What is 5S']
    Similarity Score : 0.8630610108375549
    
1, Basics of venture capital
    Headline : To understand the basics of venture capital and private equity
    Objectives Summary : ['Basics of venture capital']
    Similarity Score : 0.7879074215888977
    
2, Perspective drawing
    Headline : theory and practice
    Objectives Summary : ['Draw correctly in the perspective. Theory and practice.']
    Similarity Score : 0.7400009632110596
    
3, CHINESAY: EASY CHINESE
    Headline : Elementary
    Objectives Summary : ['Primary Chinese', 'Primary Chinese', 'Interesting Chinese Knowledge']
    Similarity Score : 0.7386841177940369
    
4, Fundamentals of Insurance - 2
    Headline : Fundamentals of Insurance - 2
    Objectives Summary : ['Fundamentals of Insurance']
    Similarity Score : 0.7302720546722412
    
5, Fundamentals of Insurance - 1
    Headline : Fundamentals of Insurance - 1
    

## Load the model

In [32]:
with open('model\sbert_model.pxl', 'rb') as file:
    loaded_model = pickle.load(file)

In [83]:
query = 'what is bitcoin'
top_results = loaded_model.semantic_search(query)

(111018,)


In [84]:
print('Result: ')
item_names = [item['item_translated_title'] for item in top_results['results']]
item_os = [item['item_translated_objectives'] for item in top_results['results']]
item_score = [item['item_score'] for item in top_results['results']]

for i in range(5):
    print(f'''{i}, {item_names[i]}
    Objectives Summary : {item_os[i]}
    Similarity Score : {item_score[i]}
    ''')
    

Result: 
0, Blockchain Technology - Bitcoin Fundamentals
    Objectives Summary : ['Fundamentals of Bitcoin']
    Similarity Score : 0.8316653966903687
    
1, ECO21: The Bitcoin Standard
    Objectives Summary : ['Economics of bitcoin']
    Similarity Score : 0.7819472551345825
    
2, The complete Cryptocurrency trading course A to Z in 2021
    Objectives Summary : ['How to buy Bitcoin/other cryptocurrencies', 'How to use Tradingview', 'How to use Binance cryptocurrency exchange']
    Similarity Score : 0.7628757357597351
    
3, Online Bitcoin Course - How does it work?
    Objectives Summary : ['How Bitcoin differs from (the policy of) the euro', 'How Bitcoin differs from payments via banks', 'what is the importance of the miners and nodes']
    Similarity Score : 0.7408488392829895
    
4, WEBSITE 101: Create Webpages and Website
    Objectives Summary : ['Website that generate revenue']
    Similarity Score : 0.7351107001304626
    


In [73]:
money_in_description = df['description'].str.contains('money')
# Filter DataFrame berdasarkan hasil pencarian kata "money"
money_data = df[money_in_description][['category']]


In [75]:
money_data[money_data['category'] == 'finance']

Unnamed: 0,category
78565,finance
78588,finance
78596,finance
78666,finance
78679,finance
...,...
87782,finance
87786,finance
87790,finance
87804,finance


In [76]:
df.iloc[78565]['description']

'Cryptocurrency Investment Course 2022: Fund your Retirement! Complete A-Z Guide to Bitcoin, Blockchain, Ethereum, Altcoins, Metaverse, NFT, DeFi & Meme coins for your retirement! [\'Learn everything you need to know about Cryptocurrency and go from Beginner to Advanced in 10 hours!\', \'Learn how to exploit money-making narratives such as Metaverse, NFT, DeFi, Launchpads, & Meme coins\', "Learn the best of what I\'ve learnt and done to be 13,000,000% up on my initial investment"] All Levels'

## Non Class Code

In [102]:
device = "cuda" if torch.cuda.is_available() else "cpu"
mdl = SentenceTransformer('distilbert-base-nli-mean-tokens', device=device)
preprocessor = Preprocessor()

In [104]:
course_embeddings = mdl.encode(df['preprocessed_description_stemmed'].tolist()) 

In [105]:
course_embeddings

array([[-0.4768942 , -0.39215064,  0.9613717 , ..., -0.13957281,
         0.37907663, -0.9578005 ],
       [-0.8194377 , -0.13334247,  0.9758199 , ..., -0.08484241,
         0.18593004, -1.2146381 ],
       [-0.5468608 , -0.21927261,  0.7769774 , ..., -0.44310072,
         0.7263519 , -1.0151007 ],
       ...,
       [-0.55172014, -0.27746943,  0.58747834, ..., -0.01891056,
         0.30784747, -0.5556028 ],
       [-0.64131   , -0.04391116,  0.980867  , ..., -0.10511277,
         0.2070878 , -0.5190067 ],
       [-0.16775839, -0.35063952,  0.95692897, ...,  0.37000546,
         0.11017403, -0.51585865]], dtype=float32)

In [122]:
query = 'learning how to be an entrepreneur'
preprocessed_query = preprocessor.preprocessing_with_stemming(query)
query_embedding = mdl.encode(preprocessed_query)

In [123]:
query_embedding

array([-1.36326075e+00, -2.86050346e-02,  2.37084031e-01, -1.93290973e+00,
        1.92263752e-01, -1.00587916e+00,  6.71310246e-01, -5.28989255e-01,
        1.35767102e-01, -7.64531791e-01, -1.36750191e-02,  7.63123751e-01,
        1.99712217e-01,  1.41949445e-01,  1.47721797e-01, -1.05926943e+00,
        9.72206667e-02, -2.32496411e-01, -1.10530138e+00, -8.01972389e-01,
        3.93903166e-01,  1.54947534e-01, -2.60417014e-01,  7.08105683e-01,
        7.34101087e-02, -2.79242456e-01, -5.24252504e-02, -2.20675856e-01,
        3.82098436e-01,  3.51612568e-01, -2.18476549e-01,  3.17307621e-01,
        4.44033802e-01, -1.57705918e-01, -1.53170764e-01, -1.63975030e-01,
        1.86067611e-01, -1.22920856e-01,  3.79963703e-02, -6.73757959e-03,
        2.77976573e-01,  5.43610573e-01,  6.35683015e-02,  1.11002803e+00,
       -4.01353866e-01, -6.22704446e-01, -1.21765232e+00,  2.89647788e-01,
       -1.09471536e+00, -3.25963907e-02,  4.46626872e-01, -5.81927717e-01,
        2.89539665e-01, -

In [109]:
query_embedding.shape

(768,)

In [110]:
query_embedding.ndim

1

In [20]:
a = query_embedding
a = query_embedding.unsqueeze(0)

AttributeError: 'numpy.ndarray' object has no attribute 'unsqueeze'

In [17]:
course_embeddings.shape

(111018, 768)

In [18]:
course_embeddings.ndim

2

In [39]:
type(course_embeddings)

numpy.ndarray

In [124]:
# Assuming query_embeddings and course_embeddings are your arrays
# query_embeddings shape: (768,)
# course_embeddings shape: (111018, 768)

# Convert numpy arrays to PyTorch tensors
query_tensor = torch.tensor(query_embedding, dtype=torch.float32)
course_tensor = torch.tensor(course_embeddings, dtype=torch.float32)

# Reshape query tensor to have shape (1, 768)
query_tensor = query_tensor.unsqueeze(0)

# Compute cosine similarity using PyTorch
cos_similarities = torch.nn.functional.cosine_similarity(course_tensor, query_tensor, dim=1)

# Now cos_similarities will be a 1D tensor containing cosine similarity values for each course
# cos_similarities.shape will be (111018,)


In [125]:
cos_similarities.shape

torch.Size([111018])

In [126]:
# Compute basic statistics
mean_value = torch.mean(cos_similarities)
median_value = torch.median(cos_similarities)
min_value = torch.min(cos_similarities)
max_value = torch.max(cos_similarities)

# Print or use the computed statistics
print("Mean value:", mean_value.item())
print("Median value:", median_value.item())
print("Minimum value:", min_value.item())
print("Maximum value:", max_value.item())

Mean value: 0.40887385606765747
Median value: 0.41382133960723877
Minimum value: -0.0008498262614011765
Maximum value: 0.7581834197044373


In [115]:
top_indices = cos_similarities.argsort(descending=True)

In [116]:
top_indices

tensor([81336, 70910,  1544,  ..., 23787, 23306, 21553])

In [117]:
cos_similarities[81336]

tensor(0.7849)

In [146]:
np.array(cos_similarities).shape

(111018,)

In [118]:
m = np.array(cos_similarities)

In [121]:
m.max()

0.78488356

In [44]:
query_tensor.shape

torch.Size([768])

In [45]:
query_tensor.ndim

1

In [46]:
course_tensor.shape

torch.Size([111018, 768])

In [47]:
course_tensor.ndim

2

In [48]:
temp_cos = util.pytorch_cos_sim(course_tensor, query_tensor)

In [49]:
temp_cos

tensor([[0.4451],
        [0.3135],
        [0.4650],
        ...,
        [0.4211],
        [0.4863],
        [0.4699]])

In [50]:
cos_similarities = torch.nn.functional.cosine_similarity(course_tensor, query_tensor, dim=1)

In [51]:
cos_similarities.shape

torch.Size([111018])

In [53]:
cos_similarities

tensor([0.4451, 0.3135, 0.4650,  ..., 0.4211, 0.4863, 0.4699])

In [63]:
import torch

cos_similarities = torch.tensor([0.9, 0.5, 0.7, 0.3])
top_indices = cos_similarities.argsort(descending=True)
print(top_indices)  # This would print tensor([0, 2, 1, 3])
print(np.array(top_indices))
a = np.array(top_indices)

tensor([0, 2, 1, 3])
[0 2 1 3]


In [66]:
for i in a:
    print("i : ", i)   
    print(a[i])     

i :  0
0
i :  2
1
i :  1
2
i :  3
3


In [74]:
df.iloc[0]

Unnamed: 0                                                                            0
course_id                                                                        655608
url                                           /course/productivity-and-time-management/
title                                 Productivity and Time Management for the Overw...
translated_title                      Productivity and Time Management for the Overw...
is_paid                                                                            True
instructors                                                            ['Josh Paulsen']
translated_instructors                                                 ['Josh Paulsen']
image_480x270                         https://img-b.udemycdn.com/course/480x270/6556...
published_title                                        productivity-and-time-management
headline                              A complete productivity and time management co...
translated_headline             

In [None]:
results = {"results":[]}
for index in top_indices:
    similarity_score = cos_similarities[index]
    filtered_course = {}
    if similarity_score > similarity_threshold:
        filtered_item['item_title'] = df.iloc[0]['title']
        filtered_item['item_headline'] =  df.iloc[0]['headline']
        filtered_item['item_objectives'] = df.iloc[0]['objectives_summary']
        filtered_item['item_score'] = similarity_score
        results["results"].append(filtered_item)

In [55]:
# Sort documents based on similarity
top_documents_indices = cos_similarities.argsort(descending=True)[:10]
# Filter documents with similarity score > similarity_threshold
results = {"results":[]}
for index in top_documents_indices:
    similarity_score = cos_similarities[0][index]
    filtered_item = {}
    if similarity_score > similarity_threshold:
        filtered_item["item_id"] = self.data.iloc[index]['id']
        filtered_item["item_name"] = self.data.iloc[index]['fullname']
        filtered_item["score"] = similarity_score
        results["results"].append(filtered_item)

In [56]:
top_documents_indices

tensor([81336, 70910,  1544,  8953,  1531, 85276, 47200, 47771, 70334, 69997])

In [35]:
# Compute basic statistics
mean_value = torch.mean(sorted_indices.float())
median_value = torch.median(sorted_indices.float())
min_value = torch.min(sorted_indices.float())
max_value = torch.max(sorted_indices.float())

# Print or use the computed statistics
print("Mean value:", mean_value.item())
print("Median value:", median_value.item())
print("Minimum value:", min_value.item())
print("Maximum value:", max_value.item())

Mean value: 55508.50390625
Median value: 55508.0
Minimum value: 0.0
Maximum value: 111017.0


In [37]:
type(cos_similarities)


torch.Tensor

In [38]:
# Compute basic statistics
mean_value = torch.mean(cos_similarities)
median_value = torch.median(cos_similarities)
min_value = torch.min(cos_similarities)
max_value = torch.max(cos_similarities)

# Print or use the computed statistics
print("Mean value:", mean_value.item())
print("Median value:", median_value.item())
print("Minimum value:", min_value.item())
print("Maximum value:", max_value.item())

Mean value: 0.414038747549057
Median value: 0.42097917199134827
Minimum value: -0.0338449701666832
Maximum value: 0.7848835587501526


In [None]:
# # Check for duplicate values in a specific column
# duplicate_values_in_column = df[df['column_name'].duplicated()]

# # Check for duplicate values in multiple columns
# duplicate_values_in_multiple_columns = df[df.duplicated(subset=['column1', 'column2'])]

In [89]:
duplicate_values_in_column = df[df['title'].duplicated()]
duplicate_values_in_column

Unnamed: 0.1,Unnamed: 0,course_id,url,title,translated_title,is_paid,instructors,translated_instructors,image_480x270,published_title,...,objectives_summary,translated_objectives_summary,is_recently_published,last_update_date,content_info,category,char_count,description,preprocessed_description_stemmed,preprocessed_description_unstemmed
2581,2581,674718,/course/communication-skills-for-beginners/,Communication Skills for Beginners,Communication Skills for Beginners,True,['Mayur Bardolia'],['Mayur Bardolia'],https://img-c.udemycdn.com/course/480x270/6747...,communication-skills-for-beginners,...,['Know what is communication and what is purpo...,['Know what is communication and what is purpo...,False,2015-11-24,4 total hours,personal_development,295.0,Communication Skills for Beginners Everything ...,commun skill beginn everyth want know exactli ...,communication skills beginners everything want...
2990,2990,1718908,/course/kundalini-awakening/,Kundalini Awakening,Kundalini Awakening,True,['Erik Smith'],['Erik Smith'],https://img-c.udemycdn.com/course/480x270/1718...,kundalini-awakening,...,['Be able to awaken the Kundalini inside of th...,['Be able to awaken the Kundalini inside of th...,False,2018-07-03,39 total mins,personal_development,51.0,Kundalini Awakening How To Unleash The Powerfu...,kundalini awaken unleash power energi insid tr...,kundalini awakening unleash powerful energy in...
3256,3256,1188252,/course/anger-management-v/,Anger Management,Anger Management,True,['TBAE Team Building and Events'],['TBAE Team Building and Events'],https://img-c.udemycdn.com/course/480x270/1188...,anger-management-v,...,"['Understand the Anger Emotion', 'Know Ways of...","['Understand the Anger Emotion', 'Know Ways of...",False,2017-05-17,1 total hour,personal_development,104.0,Anger Management Everyone experiences Anger. I...,anger manag everyon experi anger deal matter u...,anger management everyone experiences anger de...
3720,3720,2640114,/course/agile-leadership-v/,Agile Leadership,Agile Leadership,True,['Kaliym Islam'],['Kaliym Islam'],https://img-c.udemycdn.com/course/480x270/2640...,agile-leadership-v,...,['How to assess your current level of leadersh...,['How to assess your current level of leadersh...,False,2020-03-19,1 total hour,personal_development,138.0,Agile Leadership Four Values to Help You Lead ...,agil leadership four valu help lead environ ch...,agile leadership four values help lead environ...
4378,4379,2245476,/course/life-purpose-discovery/,Discover Your Life Purpose,Discover Your Life Purpose,True,['Ali Abidin'],['Ali abidin'],https://img-b.udemycdn.com/course/480x270/2245...,life-purpose-discovery,...,['Explore your 3 clues to discover your life p...,['Explore your 3 clues to discover your life p...,False,2019-05-13,2.5 total hours,personal_development,194.0,Discover Your Life Purpose Practical Steps In ...,discov life purpos practic step discov life pu...,discover life purpose practical steps discover...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110289,110318,3268804,/course/affinityphoto-fur-einsteiger/,Affinity Photo für Einsteiger,Affinity photo for beginners,True,['Nicola Lederer'],['Nicola Leader'],https://img-c.udemycdn.com/course/480x270/3268...,affinityphoto-fur-einsteiger,...,['Die Grundlagen der Fotobearbeitung in Affini...,['The basics of photo editing in affinity phot...,False,2020-06-25,2 total hours,photography,,Affinity photo for beginners Start now with im...,affin photo beginn start imag edit basic photo...,affinity photo beginners start image editing b...
110600,110629,3736602,/course/after-effects-desde-cero-e/,After Effects desde Cero,After Effects from scratch,True,['Francisco Cerda Cruz'],['Francisco Cerda Cruz'],https://img-c.udemycdn.com/course/480x270/3736...,after-effects-desde-cero-e,...,['Los Recursos basicos para manipular after ef...,['Basic resources for manipulating After Effec...,False,2021-08-17,5 total hours,photography,,After Effects from scratch After Basic Effects...,effect scratch basic effect basic resourc mani...,effects scratch basic effects basic resources ...
110759,110788,3639350,/course/adobe-premiere-pro-cc-essentials-train...,Adobe Premiere Pro CC – Essentials Training Co...,Adobe Premiere Pro CC – Essentials Training Co...,True,['Noman Shahid'],['Noman Shahid'],https://img-c.udemycdn.com/course/480x270/3639...,adobe-premiere-pro-cc-essentials-training-cour...,...,['How to add import video your Premiere Pro Pr...,['How to add import video your Premiere Pro Pr...,False,2020-11-16,1 total hour,photography,,Adobe Premiere Pro CC – Essentials Training Co...,adob premier pro cc essenti train cours start ...,adobe premiere pro cc essentials training cour...
110785,110814,4439592,/course/formation-videoscribe-juristudes/,Formation videoscribe,Formation videoscribe,True,['Franklin Lombi Mufuri'],[''frilnklin lodget,https://img-c.udemycdn.com/course/480x270/4439...,formation-videoscribe-juristudes,...,['Apprendre à utiliser le logiciel Videoscribe...,['Learn to use Videoscibe software to create s...,True,2022-02-13,2 total hours,photography,,Formation videoscribe The complete guide to cr...,format videoscrib complet guid creat anim lear...,formation videoscribe complete guide create an...


In [92]:
df[df['title'] == 'Communication Skills for Beginners']

Unnamed: 0.1,Unnamed: 0,course_id,url,title,translated_title,is_paid,instructors,translated_instructors,image_480x270,published_title,...,objectives_summary,translated_objectives_summary,is_recently_published,last_update_date,content_info,category,char_count,description,preprocessed_description_stemmed,preprocessed_description_unstemmed
607,607,1508306,/course/developing-the-communication-skills/,Communication Skills for Beginners,Communication Skills for Beginners,True,['Ganesha Pandian N'],['Ganesha Pandian n'],https://img-b.udemycdn.com/course/480x270/1508...,developing-the-communication-skills,...,['learn new techniques to develop your communi...,['learn new techniques to develop your communi...,False,2019-09-24,2 total hours,personal_development,129.0,Communication Skills for Beginners Basic tips ...,commun skill beginn basic tip techniqu develop...,communication skills beginners basic tips tech...
2581,2581,674718,/course/communication-skills-for-beginners/,Communication Skills for Beginners,Communication Skills for Beginners,True,['Mayur Bardolia'],['Mayur Bardolia'],https://img-c.udemycdn.com/course/480x270/6747...,communication-skills-for-beginners,...,['Know what is communication and what is purpo...,['Know what is communication and what is purpo...,False,2015-11-24,4 total hours,personal_development,295.0,Communication Skills for Beginners Everything ...,commun skill beginn everyth want know exactli ...,communication skills beginners everything want...
63521,63532,1551970,/course/communication-skills-for-beginners-d/,Communication Skills for Beginners,Communication Skills for Beginners,True,"['TJ Walker', 'Media Training Worldwide Digital']","['TJ Walker', 'Media Training Worldwide Digital']",https://img-b.udemycdn.com/course/480x270/1551...,communication-skills-for-beginners-d,...,['Communicate effectively to individuals and s...,['Communicate effectively to individuals and s...,False,2022-01-25,31 total hours,business,,Communication Skills for Beginners Start Build...,commun skill beginn start build commun skill c...,communication skills beginners start building ...
