# IMPORT

In [None]:
import pandas as pd
import numpy as np

#KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cluster import KMeans

#arbres de décision
from sklearn import tree
from sklearn.tree import plot_tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier

#regression
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

#preprocess
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample

#metrics
from sklearn.metrics import classification_report,precision_recall_fscore_support
from sklearn.metrics import accuracy_score, r2_score
from sklearn.metrics  import silhouette_score
from sklearn.metrics import roc_curve
from sklearn.metrics import f1_score, precision_score,recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

#dataviz
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

#NLP (text mining)
import nltk
nltk.download('popular')

import spacy
from nltk.stem import SnowballStemmer,PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

#divers
from IPython.core import display as ICD      # ICD.display()
import time

import warnings                              # permet d'ignorer les messages de warnings pour une meilleure visibilité
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df=pd.read_csv('/content/drive/MyDrive/Projets/Datathon/Datathon Fiverr/CSV/Clean_dataset 2.0.csv')


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 881 entries, 0 to 880
Data columns (total 43 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Address                     881 non-null    object 
 1   seller_link 1               881 non-null    object 
 2   seller_level 1              642 non-null    object 
 3   rating_score 1              823 non-null    float64
 4   ratings_count 1             823 non-null    float64
 5   orders_in_queue 1           275 non-null    float64
 6   collect_count 1             833 non-null    float64
 7   title_1                     881 non-null    object 
 8   description_content 1       879 non-null    object 
 9   metadata_1                  178 non-null    object 
 10  one_liner_1                 704 non-null    object 
 11  user_stats_from_1           836 non-null    object 
 12  user_stats_member_since_1   833 non-null    object 
 13  user_stats_response_time_1  799 non

In [5]:
df['seller_level 1'].unique()

array(['Level 2 Seller', 'Level 1 Seller', 'Top Rated Seller', nan],
      dtype=object)

# METADATA REMOVE TAGS

In [6]:
# vérification du type des éléments de la colonne 'metadata 1'
type(df['metadata_1'][0])

str

In [7]:
# fonction pour nettoyer les balises / tags
import re
def remove_tags(text):
  text=text.lower()
  clean = re.compile('<.*?>')        # compile tous les motifs (tags) entre <> qu'il trouvera dans le texte
  return re.sub(clean,' ', text)    # re.sub(motif à remplacer, remplacé par , data)


In [8]:
# test sur une ligne
remove_tags(df['metadata_1'][0])

'  platform   webflow     specialization   blog  business  education  portfolio  non-profit  forms  saas  portal     supported plugin types   marketing  payment  forum  social media  customer support  shipping  inventory  analytics  video  form  events  music  chat   membership  map  faq  gallery   '

In [9]:
df['metadata_1']=df['metadata_1'].apply(str)

In [10]:
df['metaclean']=df['metadata_1'].apply(lambda x : remove_tags(x))

In [11]:
pd.set_option('max_colwidth', 200)
print(ICD.display(df['metaclean'][:20]))

0       platform   webflow     specialization   blog  business  education  portfolio  non-profit  forms  saas  portal     supported plugin types   marketing  payment  forum  social media  customer suppo...
1       platform   wix     specialization   blog  business  education  portfolio  entertainment  non-profit  wedding     supported plugin types   payment  forum  social media  customer support  inventor...
2                                                                                                                                                                                                         nan
3       platform   wix     specialization   blog  business  education  portfolio  entertainment  non-profit  wedding  podcasting  online communities  forms  crowdfunding  wiki /knowledge  saas  job boar...
4       platform   wix     specialization   blog  business  education  portfolio  entertainment  non-profit  wedding  podcasting  forms  crowdfunding  wiki /knowledge  saas  jo

None


# WORDS OCCURENCES

In [12]:
import nltk
from nltk.corpus import stopwords
stopword = nltk.corpus.stopwords.words('english')
newlist=['platform','specialization','expertise','database type','nan','supported','plugin','types']
stopword.extend(newlist)

from nltk.tokenize import RegexpTokenizer

# Clean ponctuations, stopwords et majuscules
def func_clean(sentence):
	sentence = sentence.lower()
	tokenizer = RegexpTokenizer(r'\w+')
	tokens = tokenizer.tokenize(sentence)
	filtered_words = filter(lambda token: token not in stopword, tokens)
	return " ".join(filtered_words)

In [13]:
df['metaclean']=df['metaclean'].apply(func_clean)
pd.set_option('max_colwidth', 200)
print(ICD.display(df['metaclean'][:20]))

0     webflow blog business education portfolio non profit forms saas portal marketing payment forum social media customer support shipping inventory analytics video form events music chat membership ma...
1                    wix blog business education portfolio entertainment non profit wedding payment forum social media customer support inventory analytics video form events chat membership map faq gallery
2                                                                                                                                                                                                            
3     wix blog business education portfolio entertainment non profit wedding podcasting online communities forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum social...
4     wix blog business education portfolio entertainment non profit wedding podcasting forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum

None


## SLICE SUR LES TOP SELLER ET LEVEL 2

In [14]:
df_top_2 = df.loc[(df['seller_level 1']=='Level 2 Seller') | (df['seller_level 1']=='Top Rated Seller')]
df_top_2['seller_level 1'].unique()

array(['Level 2 Seller', 'Top Rated Seller'], dtype=object)

In [15]:
df_top_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 412 entries, 0 to 878
Data columns (total 44 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Address                     412 non-null    object 
 1   seller_link 1               412 non-null    object 
 2   seller_level 1              412 non-null    object 
 3   rating_score 1              386 non-null    float64
 4   ratings_count 1             386 non-null    float64
 5   orders_in_queue 1           160 non-null    float64
 6   collect_count 1             393 non-null    float64
 7   title_1                     412 non-null    object 
 8   description_content 1       412 non-null    object 
 9   metadata_1                  412 non-null    object 
 10  one_liner_1                 380 non-null    object 
 11  user_stats_from_1           401 non-null    object 
 12  user_stats_member_since_1   398 non-null    object 
 13  user_stats_response_time_1  398 non

In [16]:
words_occurences=pd.Series(' '.join(df_top_2.metaclean).split()).value_counts()[:60]
words_occurences

business         100
education         99
portfolio         98
blog              98
entertainment     97
gallery           95
wedding           92
payment           91
media             90
social            90
form              90
profit            89
non               89
map               88
faq               88
video             86
chat              82
events            79
marketing         78
shipping          77
forum             73
analytics         73
membership        72
music             72
inventory         68
support           68
customer          68
podcasting        63
forms             63
online            61
communities       61
crowdfunding      48
brochure          46
portal            44
knowledge         43
wiki              43
wix               40
job               38
board             38
saas              35
squarespace       20
godaddy           15
webflow            9
sitebuilder        7
joomla             3
blogger            3
dreamweaver        2
tokenization 

In [17]:
df_occurences=pd.DataFrame(data=words_occurences.items(),columns=['words','occurences'])

In [18]:
df_top_2.head()

Unnamed: 0,Address,seller_link 1,seller_level 1,rating_score 1,ratings_count 1,orders_in_queue 1,collect_count 1,title_1,description_content 1,metadata_1,one_liner_1,user_stats_from_1,user_stats_member_since_1,user_stats_response_time_1,user_stats_last_delivery,seller_desc_expanded_1,package1_title_1,package1_type_1,package1_price_1,package1_desc_1,package2_title_1,package2_type_1,package2_price_1,package2_desc_1,package3_title_1,package3_type_1,package3_price_1,package3_desc_1,faq1_q_1,faq1_a_1,faq2_q_1,faq2_a_1,faq3_q_1,faq3_a_1,faq4_q_1,faq4_a_1,review1_desc_1,review2_desc_1,review3-desc 1,review1_rating_1,review2_rating_1,review3_rating_1,category_1,metaclean
0,https://www.fiverr.com/westcoastwp/design-and-build-a-responsive-high-performing-website,westcoastwp,Level 2 Seller,5.0,8.0,,141.0,I will create a high converting website in webflow,"Need a professional high converting website for your business?Send us a message to get started.You can expect:Competitive researchUI & prototypingResponsive website built in WebflowSEO-friendly, s...","<li class=""metadata-attribute""><p>Platform</p><ul><li>Webflow</li></ul></li><li class=""metadata-attribute""><p>Specialization</p><ul><li>Blog</li><li>Business</li><li>Education</li><li>Portfolio</l...","Finerfox Media, an independent digital agency based in Los Angeles",,,,,,STARTUP,Basic,1732.0,"Professional single page site, up to 6 sections",ESTABLISHED,Standard,4329.0,Professional 5 page site - CMS - 1 plugin,INDUSTRY LEADER,Premium,6926.0,Professional 10 page site - CMS - ecommerce - 1 plugin,,,,,,,,,"Finerfox Media is hands down the best dev team I've had the pleasure of working with. This project sprung up quickly, but their team was cool and calm and ready to take it on. I've already asked i...",Very professional.,Item as delivered as described.,5.0,5.0,5.0,,webflow blog business education portfolio non profit forms saas portal marketing payment forum social media customer support shipping inventory analytics video form events music chat membership ma...
2,https://www.fiverr.com/yougeen/build-you-a-membership-funnel-in-clickfunnels,yougeen,Level 2 Seller,5.0,41.0,,88.0,I will build you a funnel in clickfunnels,"I'll build and setup a Clickfunnels website for you with a design based on your requirements. No matter if you want a Lead Magnet funnel or a membership funnel, I can do this for you. I can help y...",,"professional, reliable and customer oriented",Germany,02/2015,2.0,504.0,,Clickfunnels Opt-in / Lead Page,Basic,82.24,1 Lead Page with Opt-in Form and Thank you Page,Clickfunnels Sales Funnel,Standard,212.09,Build Sales page with product integration,Clickfunnels Sales + Member Funnel,Premium,298.66,Build Sales page with a member area,Do I need a Clickfunnels subscription for this gig?,"Yes, you need your own Clickfunnels account. If you don't have one yet, just let me know and I can help you.",I don't have any sale copy. Can you help me with that?,"I don't write any sale copy, please provide me all your copies for the website.",How will I receive my funnel?,"I will create the funnel in your Clickfunnels account, right after I'm done with my work, you can start using it.",,,"Eugen is a great guy and super engaged. He is not only doing the job better than expected, he truly takes care of the whole process. I like it when people do not talk about why it's not going to w...",Yougeen helped me a lot with my existing funnel in clickfunnels. He is my hero of the day :-) Thank you for your commitment,Sehr unkompliziert und entgegenkommend,5.0,5.0,5.0,Full Website Creation,
3,https://www.fiverr.com/zenntech/create-professional-website-on-wix-wordpress-shopify-with-responsive-design,zenntech,Level 2 Seller,5.0,56.0,5.0,92.0,"I will develop optimized website on wordpress,shopify, wix with responsiveness","Hi, Fiverr Fans!I am Ramish! Thank You for being here. I specialize in Full Stack Development and Web Design with over 3+ years of experience. I am the Founder & CEO of ZennTech i.e. is a team of ...","<li class=""metadata-attribute""><p>Platform</p><ul><li>Wix</li></ul></li><li class=""metadata-attribute""><p>Specialization</p><ul><li>Blog</li><li>Business</li><li>Education</li><li>Portfolio</li><l...","Expert in WordPress, Wix, Shopify",Pakistan,09/2020,1.0,48.0,"Hello. I'm Ramish Ch the Founder & CEO of ZennTech which is a team of professional designers, developers, animators and writers working together to take IT to the next level. From design to develo...",BASIC,Basic,86.57,One Page Website/Landing Page Development,STANDARD,Standard,324.63,"Professional Website on WordPress, Wix or Shopify upto 5 pages development\n\n\n\n",PREMIUM,Premium,627.61,"Complete/Ecommerce Website on Wix, WordPress or Shopify upto 10 pages development",How much time is required for develpment of complete website?,"Once we have got all our requirements i.e the content including text and images, it should take 2 - 10 days for us to design and build a website for you. This may vary for complex projects. Feel f...",Is our Websites reponsive?,"Yes, we develop responsive Websites which are friendly to all devices i.e mobile, desktop, tab, etc",Which platform is best for website development?,"Well, the answer to this question is quite flexible. It depends upon the nature of the website you want. Feel free to message me I will analyze you requirements and brief you on whats platform is ...",,,"Amazing customer service, was sure to communicate and get the needs of the business plus more. Would highly recommended, without a doubt the best website developer we could have asked for.",Great communication! Ramish also looked at project scope before we started to make sure it can be done. Great suggestions and help throughout.,"Very easy to work with and helped explain what to do, provided good starting information with the quote and then delivered the job good quality on and on time.",5.0,5.0,5.0,Full Website Creation,wix blog business education portfolio entertainment non profit wedding podcasting online communities forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum social...
4,https://www.fiverr.com/wix_mentor/design-and-redesign-wix-website-professionally,wix_mentor,Level 2 Seller,5.0,95.0,4.0,279.0,I will design wix website and redesign business wix website,"Hello,I am Wix mentor are you looking for someone who can Design Wix website and if you have already a Wix website and want to redesign Wix website don’t worry I am here to assist you I will desi...","<li class=""metadata-attribute""><p>Platform</p><ul><li>Wix</li></ul></li><li class=""metadata-attribute""><p>Specialization</p><ul><li>Blog</li><li>Business</li><li>Education</li><li>Portfolio</li><l...","without problems, we would not grow",Pakistan,05/2020,1.0,2.0,"Hi, My name is Umer, but I love to be called Wix Mentor! I am a professional Wix website designer, Graphic Designer, and SEO specialist having 3+ years of extensive experience. I can design and de...",CONTACT ME BEFORE PLACING THE ORDER,Basic,17.31,Responsive wix website design/redesign+ONE page+free support +unlimited revisions,CONTACT ME BEFORE PLACING THE ORDR.,Standard,77.91,Responsive wix website design/redesign+FIVE page+free support +unlimited revisions,.CONTACT ME BEFORE PLACING THE ORDR,Premium,129.85,Responsive wix website design/redesign+TEN page+free VIP support +unlimited revisions+VIDEO GUIDE,What information is required before starting the work?,"Menu pages names, logo, Share website example links, images, and description (Don’t worry about images if you don’t have any images I can use locality free images from unsplash.com).",Do you design the Wix website from scratch?,"Yes, if you don’t like any template I will design it from starch.",what you give support after project completion?,"Yes, I will give seven days of free support after the completion of work.",Wix vs Wordpress?,We can’t compare Wix vs Wordpress both CMS have their functionalities and features but the main difference is that wix.com provides hosting,@wix_mentor was really the best website designer ever ! This was my first time creating a website so I knew it would be very difficult but wix mentor made the process so easy! They were very willi...,"Thank you so much to the Wix Mentor as they had made my website fully functional for my business, went over and beyond to help, even after the service ended! I am so grateful to have chosen this S...",wix_mentor was outstanding. It was a pleasure working with this professional. Always responsive and addressed my concerns/questions in a timely manner. I look forward to hiring wix_mentor for more...,5.0,5.0,5.0,Full Website Creation,wix blog business education portfolio entertainment non profit wedding podcasting forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum social media customer sup...
5,https://www.fiverr.com/webgeek_pro/design-customize-and-fix-your-blogger-template,webgeek_pro,Level 2 Seller,5.0,303.0,,443.0,"I will design, customize and fix your blogger template","Hi everyone!If you want to Design, Customize or Fix any issue related to Blogger website, then you're on right Gig. I am perfect person for all your Blogger related issues. I have 3+ years of expe...","<li class=""metadata-attribute""><p>Platform</p><ul><li>Blogger</li></ul></li><li class=""metadata-attribute""><p>Specialization</p><ul><li>Blog</li><li>Business</li><li>Education</li><li>Portfolio</l...",A self starter who loves to code Web,Pakistan,02/2019,1.0,3.0,"Energetic Freelance Web developer, self-starter who loves to code Web. Goal-focused with high UI/UX design & coding skills.\n\nI offer services related to Web Development, Bug Fixing, Design & Red...",Small Fixing or Customization,Basic,8.66,1 Bug Fix or 1 Customization,Complete Fixing or Customization,Standard,43.28,Fix ALL Bugs or Do ALL Customizations,Custom Blogger Template,Premium,86.57,"Create a theme from scratch , Custom layout , Responsive & SEO friendly , 7 days support",,,,,,,,,The seller went above and beyond. Sorted out so many more issues than I had paid for. Service was quick and professional. I will definitely be coming back here for more work. Highly recommended!,Utterly brilliant - super quick Best experience so far and really hope to work with you again. Take care Christi,"Seller was willing to work with specific issues and troubleshooting. The gig was done on a fast pace. Overall, satisfied.",5.0,5.0,4.7,Full Website Creation,blogger blog business education portfolio entertainment non profit wedding social media analytics form chat faq gallery


In [19]:
df_occurences

Unnamed: 0,words,occurences
0,business,100
1,education,99
2,portfolio,98
3,blog,98
4,entertainment,97
5,gallery,95
6,wedding,92
7,payment,91
8,media,90
9,social,90


# OCCURENCES DES MOTS PAR CATEGORIE

In [20]:
from bs4 import BeautifulSoup
import requests

In [21]:
def occurences(meta):
  dico={}
  new_meta = meta.split(sep="<p")
  new_meta.pop(0)
  for item in new_meta:
    #print(item)
    item = "<p" + item
    soup = BeautifulSoup(item)
    for balise in soup:
        for li in balise:
          dico[balise.find('p').text]= [li.text for li in balise.find_all('li') if li.text != '']
  return dico

In [22]:
occ_ = df_top_2['metadata_1'].apply(occurences)
occ_=occ_.to_dict()
dico_ = occ_.values()
df_occ_= pd.DataFrame.from_dict(dico_)
df_occ_.dropna(inplace=True , how="all")

for columns in df_occ_:
  print(columns)
  print(df_occ_[columns].explode(columns).value_counts())
  print("---------------------------------------")

Platform
Wix                  40
Squarespace          20
GoDaddy              15
Webflow               9
SiteBuilder           7
Joomla                3
Blogger               3
Adobe Dreamweaver     2
ClickFunnels          2
Tilda                 1
Weebly                1
Drupal                1
Name: Platform, dtype: int64
---------------------------------------
Specialization
Business              100
Education              99
Blog                   98
Portfolio              98
Entertainment          97
Wedding                92
Non-profit             89
Forms                  63
Podcasting             63
Online Communities     61
Crowdfunding           48
Brochure               46
Portal                 44
Wiki /Knowledge        43
Job Board              38
SaaS                   35
Name: Specialization, dtype: int64
---------------------------------------
Supported plugin types
Gallery             95
Social Media        90
Form                90
Payment             90
FAQ          

In [23]:
df_occ_

Unnamed: 0,Platform,Specialization,Supported plugin types,Token Type,Expertise,Programming Language
0,[Webflow],"[Blog, Business, Education, Portfolio, Non-profit, Forms, SaaS, Portal]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Inventory, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,
2,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding, Podcasting, Online Communities, Forms, Crowdfunding, Wiki /Knowledge, SaaS, Job Board, Portal, Brochure]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Inventory, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,
3,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding, Podcasting, Forms, Crowdfunding, Wiki /Knowledge, SaaS, Job Board, Portal, Brochure]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Analytics, Video, Form, Events, Chat , Membership, Map, FAQ, Gallery]",,,
4,[Blogger],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding]","[Social Media, Analytics, Form, Chat , FAQ, Gallery]",,,
6,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Online Communities, Forms, Portal]","[Payment, Social Media, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,
...,...,...,...,...,...,...
396,[SiteBuilder],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding, Podcasting, Online Communities, Forms, Crowdfunding, Wiki /Knowledge, Portal]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Inventory, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,
399,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding, Podcasting, Online Communities, Forms, Crowdfunding, Brochure]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Inventory, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,
400,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding]","[Payment, Social Media, Shipping, Form, Chat ]",,,
401,[Wix],"[Blog, Business, Education, Portfolio, Entertainment, Non-profit, Wedding, Podcasting, Online Communities, Forms, Crowdfunding, Wiki /Knowledge, SaaS, Job Board, Portal, Brochure]","[Marketing, Payment, Forum, Social Media, Customer Support, Shipping, Inventory, Analytics, Video, Form, Events, Music, Chat , Membership, Map, FAQ, Gallery]",,,


### Specialization

In [24]:
df_specialistation=df_occ_['Specialization']
serie_specialization = df_occ_['Specialization'].explode('Specialization').value_counts()
df_specialization = pd.DataFrame(serie_specialization)
df_specialization.reset_index(inplace=True)
df_specialization.columns=["Specialization","Count"]
df_specialization

Unnamed: 0,Specialization,Count
0,Business,100
1,Education,99
2,Blog,98
3,Portfolio,98
4,Entertainment,97
5,Wedding,92
6,Non-profit,89
7,Forms,63
8,Podcasting,63
9,Online Communities,61


In [25]:
df_specialization.to_csv('df_specialization.csv')
!cp df_specialization.csv "drive/My Drive/"

### Platform

In [28]:
df_platform=df_occ_['Platform']
serie_plateforme = df_occ_['Platform'].explode('Platform').value_counts()
df_plateforme = pd.DataFrame(serie_plateforme)
df_plateforme.reset_index(inplace=True)
df_plateforme.columns=["Platform","Count"]
df_plateforme

Unnamed: 0,Platform,Count
0,Wix,40
1,Squarespace,20
2,GoDaddy,15
3,Webflow,9
4,SiteBuilder,7
5,Joomla,3
6,Blogger,3
7,Adobe Dreamweaver,2
8,ClickFunnels,2
9,Tilda,1


In [None]:
df_plateforme.to_csv('df_platform.csv')
!cp df_platform.csv "drive/My Drive/"

### Supported plugin types

In [27]:
serie_plugin = df_occ_['Supported plugin types'].explode('Supported plugin types').value_counts()
df_plugin = pd.DataFrame(serie_plugin)
df_plugin.reset_index(inplace=True)
df_plugin.columns=["Supported plugin types","Count"]
df_plugin

Unnamed: 0,Supported plugin types,Count
0,Gallery,95
1,Social Media,90
2,Form,90
3,Payment,90
4,FAQ,88
5,Map,88
6,Video,86
7,Chat,82
8,Events,79
9,Marketing,78


In [None]:
df_plugin.to_csv('df_plugin.csv')
!cp df_plugin.csv "drive/My Drive/"

# TFIDF

# Nouvelle section

In [None]:
!pip install texthero

Collecting texthero
  Downloading https://files.pythonhosted.org/packages/1f/5a/a9d33b799fe53011de79d140ad6d86c440a2da1ae8a7b24e851ee2f8bde8/texthero-1.0.9-py3-none-any.whl
Collecting unidecode>=1.1.1
[?25l  Downloading https://files.pythonhosted.org/packages/9e/25/723487ca2a52ebcee88a34d7d1f5a4b80b793f179ee0f62d5371938dfa01/Unidecode-1.2.0-py2.py3-none-any.whl (241kB)
[K     |████████████████████████████████| 245kB 10.8MB/s 
[?25hCollecting nltk>=3.3
[?25l  Downloading https://files.pythonhosted.org/packages/5e/37/9532ddd4b1bbb619333d5708aaad9bf1742f051a664c3c6fa6632a105fd8/nltk-3.6.2-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 42.7MB/s 
Installing collected packages: unidecode, nltk, texthero
  Found existing installation: nltk 3.2.5
    Uninstalling nltk-3.2.5:
      Successfully uninstalled nltk-3.2.5
Successfully installed nltk-3.6.2 texthero-1.0.9 unidecode-1.2.0


In [None]:
import texthero as hero
df_top_2['tfidf'] = hero.tfidf(df_top_2['metaclean'])

In [None]:
df_tfidf=df_top_2.loc[:,['metaclean','tfidf']]

In [None]:
df_tfidf=df_tfidf.reset_index()

In [None]:
df_tfidf.head()

Unnamed: 0,index,metaclean,tfidf
0,0,webflow blog business education portfolio non profit forms saas portal marketing payment forum social media customer support shipping inventory analytics video form events music chat membership ma...,"[0.0, 0.18314327391215673, 0.0, 0.0, 0.0, 0.1635414999518146, 0.0, 0.0, 0.0, 0.16219450754401143, 0.175413444250721, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18785480123752887, 0.0, 0.0, 0.0, 0.0, 0.162864..."
1,2,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,3,wix blog business education portfolio entertainment non profit wedding podcasting online communities forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum social...,"[0.0, 0.15452108213093488, 0.0, 0.0, 0.0, 0.13798273344175055, 0.0, 0.1909158466627351, 0.18031363687088064, 0.13684625313302934, 0.14799929392403768, 0.0, 0.0, 0.16457466028309914, 0.177945705863..."
3,4,wix blog business education portfolio entertainment non profit wedding podcasting forms crowdfunding wiki knowledge saas job board portal brochure marketing payment forum social media customer sup...,"[0.0, 0.16318869402857988, 0.0, 0.0, 0.0, 0.14572265323493366, 0.0, 0.20162496441652286, 0.19042804069653702, 0.14452242388880301, 0.1563010765880502, 0.0, 0.0, 0.0, 0.18792728440246428, 0.0, 0.0,..."
4,5,blogger blog business education portfolio entertainment non profit wedding social media analytics form chat faq gallery,"[0.0, 0.24255278537749098, 0.0, 0.0, 0.0, 0.21659242783413182, 0.5028006240773572, 0.0, 0.0, 0.21480848702420777, 0.23231549041805907, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2156..."


In [None]:
sum(df_tfidf['tfidf'][1])

0.0