In [None]:
import string
import pandas as pd
import numpy as np
import re
import matplotlib as plt
import seaborn as snb


In [None]:
df = pd.read_csv('Language Detection.csv')
df.head()

Unnamed: 0,Text,Language
0,"Nature, in the broadest sense, is the natural...",English
1,"""Nature"" can refer to the phenomena of the phy...",English
2,"The study of nature is a large, if not the onl...",English
3,"Although humans are part of nature, human acti...",English
4,[1] The word nature is borrowed from the Old F...,English


## Cleaning up

In [None]:
def remove_pun(text):
  for p in string.punctuation:
    text=text.replace(p,"")
  text=text.lower()
  return text

In [None]:
testing = "Sathvik is a good Boy!!"

In [None]:
remove_pun(testing)

'sathvik is a good boy'

In [None]:
df['Text']=df['Text'].apply(remove_pun)
df

Unnamed: 0,Text,Language
0,nature in the broadest sense is the natural p...,English
1,nature can refer to the phenomena of the physi...,English
2,the study of nature is a large if not the only...,English
3,although humans are part of nature human activ...,English
4,1 the word nature is borrowed from the old fre...,English
...,...,...
10332,ನಿಮ್ಮ ತಪ್ಪು ಏನು ಬಂದಿದೆಯೆಂದರೆ ಆ ದಿನದಿಂದ ನಿಮಗೆ ಒ...,Kannada
10333,ನಾರ್ಸಿಸಾ ತಾನು ಮೊದಲಿಗೆ ಹೆಣಗಾಡುತ್ತಿದ್ದ ಮಾರ್ಗಗಳನ್...,Kannada
10334,ಹೇಗೆ ನಾರ್ಸಿಸಿಸಮ್ ಈಗ ಮರಿಯನ್ ಅವರಿಗೆ ಸಂಭವಿಸಿದ ಎಲ...,Kannada
10335,ಅವಳು ಈಗ ಹೆಚ್ಚು ಚಿನ್ನದ ಬ್ರೆಡ್ ಬಯಸುವುದಿಲ್ಲ ಎಂದು ...,Kannada


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.iloc[:,0]
Y = df.iloc[:,1]

In [None]:
X_train,X_test, Y_train,Y_test = train_test_split(X,Y, test_size=.3)

8470            däremot tolereras inte avsiktlig vandalism
438      142 articles available in more than one langua...
5643                     βιβλιογραφία είναι αμφισβητούμενο
2891     outras razões pelas quais esta mensagem pode a...
3220     eram seres mágicos algo que a mãe de narcisse ...
                               ...                        
4475                                     was niet in staat
3092                                       eu tenho que ir
9035     54 في حالات معينة، يُسمح لجميع المحررين بإرسال...
2781     153 as solicitações de página são servidas por...
10303    ಆ ದಿನ ಇಬ್ಬರು ಸ್ನೇಹಿತರು ನಾರ್ಸಿಸಸ್ ತಾಯಿಯನ್ನು ಭೇಟ...
Name: Text, Length: 7235, dtype: object

In [None]:
from sklearn  import feature_extraction

In [None]:
vec = feature_extraction.text.TfidfVectorizer(ngram_range=(1,2), analyzer='char')

In [None]:
from sklearn import pipeline
from sklearn import linear_model

In [None]:
model_p = pipeline.Pipeline([('vec',vec),('clf',linear_model.LogisticRegression())])

In [None]:
model_p.fit(X_train,Y_train)

In [None]:
pred = model_p.predict(X_test)

In [None]:
from sklearn import metrics

In [None]:
metrics.accuracy_score(Y_test,pred)*100

97.45325596389426

In [None]:
metrics.confusion_matrix(Y_test,pred)

array([[146,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0],
       [  0, 137,   2,   3,   1,   1,   0,   0,   0,   0,   0,   0,   0,
          0,   7,   0,   1],
       [  0,   2, 150,   3,   2,   1,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0],
       [  0,   1,   0, 423,   2,   0,   0,   0,   3,   0,   0,   0,   0,
          3,   0,   0,   0],
       [  0,   0,   0,   0, 316,   0,   0,   0,   2,   0,   0,   1,   0,
          0,   0,   0,   1],
       [  0,   0,   2,   1,   1, 126,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0, 107,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,  22,   0,   0,   0,   0,   0,
          0,   0,   0,   0],
       [  0,   1,   0,   1,   1,   0,   0,   0, 209,   0,   0,   0,   0,
          2,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  99,   0,   0,   0,
         

In [None]:
model_p.predict(["ಶಿಕ್ಷಣಕ್ಕೆ ಪ್ರತಿಯೊಬ್ಬನಿಗೂ ಹಕ್ಕುಂಟು ಶಿಕ್ಷಣವು ಕೊನೆಯ ಪಕ್ಷಪ್ರಾ ಥಮಿಕ ಹಾಗೂ ಮೂಲ ದರ್ಚೆಗಳಲ್ಲಿ ಧರ್ಮಾರ್ಥವಾಗಿರಬೇಕು. ಪ್ರಾಥಮಿಕ ಶಿಕ್ಷಣವು ಕಡ್ಡಾ ಯವಾಗಿರಬೇಕು. ಶಿಲ್ಪ ಶಿಕ್ಷಣ ಹಾಗೂ ವೃತ್ತಿಶಿಕ್ಷಣಗಳು ಸಾಮಾನ್ಯ ವಾಗಿ ಒದಗಿಸಲಾಗುವುವು. ಮತ್ತು ಉಚ್ಚ ಶಿಕ್ಷಣವು ಯೋಗ್ಯತೆಯ ಮೇಲೆ ಸರ್ವರಿಗೂ ಸಮಾನವಾಗಿ ದೊರೆಯಲಾಗುವುದು."])

array(['Kannada'], dtype=object)

In [None]:
import pickle

In [None]:
new_file = open('model.pckl','wb')
pickle.dump(model_p,new_file)
new_file.close()

open('app.py','w')

<_io.TextIOWrapper name='app.py' mode='w' encoding='UTF-8'>

In [None]:
import os
os.listdir()

['.config', 'Language Detection.csv', 'model.pkl', 'model.pckl', 'sample_data']

In [None]:
!pip install streamlit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit
  Downloading streamlit-1.20.0-py2.py3-none-any.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting validators>=0.2
  Downloading validators-0.20.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting watchdog
  Downloading watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 KB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 KB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184

In [None]:
!streamlit run app.py & npx localtunnel --port 8501

[##................] - fetchMetadata: sill resolveWithNewModule is-fullwidth-co[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[K[?25h[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.184.50.26:8501[0m
[0m
[34m  Stopping...[0m
^C


In [None]:
!npm install -g localtunnel

[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/lt.js
+ localtunnel@2.0.2
added 22 packages from 22 contributors in 1.575s
