#### Install requirements

In [9]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import re

# Donnees
data = [
    ("Ce produit est genial", "positif"),
    ("Je suis tres decu", "negatif"),
    ("Qualite excellente", "positif"),
    ("Service horrible", "negatif"),
    ("Parfait pour mes besoins", "positif"),
    ("Catastrophique", "negatif"),
    ("Superbe qualite", "positif"),
    ("A eviter absolument", "negatif")
]

# 1. Pretraitement
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

texts = [preprocess(t) for t, _ in data]
labels = [l for _, l in data]

# 2. TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

# 3. Division train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.4, random_state=2
)

# 4. Entrainement
model = MultinomialNB()
model.fit(X_train, y_train)

# 5. Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Test sur nouvelle phrase
new_text = ["Ce service est horrible"]
new_vec = vectorizer.transform([preprocess(new_text[0])])
prediction = model.predict(new_vec)
print(f"Prediction: {prediction[0]}")
#\end{lstlisting}


              precision    recall  f1-score   support

     negatif       0.25      1.00      0.40         1
     positif       0.00      0.00      0.00         3

    accuracy                           0.25         4
   macro avg       0.12      0.50      0.20         4
weighted avg       0.06      0.25      0.10         4

Prediction: negatif


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [10]:
import pandas as pd
import re
import nltk
import spacy
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.snowball import FrenchStemmer

# Télécharger les ressources NLTK
nltk.download('punkt')
nltk.download('stopwords')

# Charger le modèle SpaCy français
nlp = spacy.load("fr_core_news_sm")

[nltk_data] Error loading punkt: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


OSError: [E050] Can't find model 'fr_core_news_sm'. It doesn't seem to be a Python package or a valid path to a data directory.

In [4]:
!pip install librosa==0.8.0
!pip install pysoundfile==0.9.0.post1
!pip install unidecode==1.3.4
#!pip install pyopenjtalk 
#==0.2.0
!pip install inflect==5.6.2
!pip install janome==0.4.2
!pip install pyopenjtalk-prebuilt==0.2.0


Collecting pyopenjtalk-prebuilt==0.2.0
  Downloading pyopenjtalk_prebuilt-0.2.0-cp38-cp38-win_amd64.whl.metadata (7.0 kB)
Collecting cython>=0.21.0 (from pyopenjtalk-prebuilt==0.2.0)
  Using cached cython-3.2.0-cp38-cp38-win_amd64.whl.metadata (3.5 kB)
Downloading pyopenjtalk_prebuilt-0.2.0-cp38-cp38-win_amd64.whl (920 kB)
   -------------------------------------- 920.7/920.7 kB 133.0 kB/s eta 0:00:00
Using cached cython-3.2.0-cp38-cp38-win_amd64.whl (2.8 MB)
Installing collected packages: cython, pyopenjtalk-prebuilt
Successfully installed cython-3.2.0 pyopenjtalk-prebuilt-0.2.0


#### Train without pretrained model

In [1]:
# LJ Speech
!pip install monotonic_align
#!pip install -r requirements.txt
#!python train.py -c configs/ljs_base.json -m ljs_base

# VCTK
#python train_ms.py -c configs/vctk_base.json -m vctk_base


Collecting monotonic_align
  Downloading https://files.pythonhosted.org/packages/2e/fc/814cbd78dd57880267355179ef74ba24d12daeb68776221f58072ac70643/monotonic_align-1.0.0.tar.gz
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
Building wheels for collected packages: monotonic-align
  Running setup.py bdist_wheel for monotonic-align: started
  Running setup.py bdist_wheel for monotonic-align: finished with status 'done'
  Stored in directory: C:\Users\akash\AppData\Local\pip\Cache\wheels\41\1c\73\070b74e80dbe00c96df83d61399f62707f13bdecc6f2569798
Successfully built monotonic-align
Installing collected packages: monotonic-align
Successfully installed monotonic-align-1.0.0


  Missing build time requirements in pyproject.toml for monotonic_align from https://files.pythonhosted.org/packages/2e/fc/814cbd78dd57880267355179ef74ba24d12daeb68776221f58072ac70643/monotonic_align-1.0.0.tar.gz#sha256=10bed2cf6f5f3cab716d1d14064cee4c759cd06e0eba63d199306f0c6839ffb1: 'wheel'.
  This version of pip does not implement PEP 517 so it cannot build a wheel without 'setuptools' and 'wheel'.
rdflib 6.3.2 has requirement importlib-metadata<5.0.0,>=4.0.0; python_version >= "3.7" and python_version < "3.8", but you'll have importlib-metadata 6.7.0 which is incompatible.
rdflib 6.3.2 has requirement isodate<0.7.0,>=0.6.0, but you'll have isodate 0.7.2 which is incompatible.
You are using pip version 10.0.1, however version 24.0 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.
Traceback (most recent call last):
  File "train.py", line 23, in <module>
    from models import (
  File "c:\Users\akash\Downloads\FastSpeech2-master\vits_

In [None]:
!python -m pip install --upgrade pip setuptools wheel
python preprocess.py --out_extension cleaned1 --text_index 1 --filelists filelists/ljs_audio_text_train_filelist.txt filelists/ljs_audio_text_val_filelist.txt --text_cleaners english_cleaners3

Collecting pip
  Downloading https://files.pythonhosted.org/packages/8a/6a/19e9fe04fca059ccf770861c7d5721ab4c2aebc539889e97c7977528a53b/pip-24.0-py3-none-any.whl (2.1MB)
Requirement already up-to-date: setuptools in c:\users\akash\appdata\local\programs\python\python37\lib\site-packages (68.0.0)
Requirement already up-to-date: wheel in c:\users\akash\appdata\local\programs\python\python37\lib\site-packages (0.42.0)
Installing collected packages: pip
  Found existing installation: pip 10.0.1
    Uninstalling pip-10.0.1:
      Successfully uninstalled pip-10.0.1
Successfully installed pip-24.0


Cache entry deserialization failed, entry ignored
rdflib 6.3.2 has requirement importlib-metadata<5.0.0,>=4.0.0; python_version >= "3.7" and python_version < "3.8", but you'll have importlib-metadata 6.7.0 which is incompatible.
rdflib 6.3.2 has requirement isodate<0.7.0,>=0.6.0, but you'll have isodate 0.7.2 which is incompatible.
  The script pip3.10.exe is installed in 'C:\Users\akash\AppData\Local\Programs\Python\Python37\Scripts' which is not on PATH.


#### Train with a pretrained model

In [None]:
!python train.py --output_directory=outdir --log_directory=logdir -c tacotron2_statedict.pt --warm_start