# Working with Files in Python

## String Formatter 

In [1]:
name = 'This is Pavan Aditya Kumar Gorrela'
follower = '30k'

In [2]:
print('name', 'follower')
print(name, follower)

name follower
This is Pavan Aditya Kumar Gorrela 30k


In [3]:
ds = [('python', 50), ('tensorflow', 100), ('nlp', 200)]

In [4]:
print(ds)

[('python', 50), ('tensorflow', 100), ('nlp', 200)]


In [5]:
for info in ds:
    print(info[0], info[1])

python 50
tensorflow 100
nlp 200


In [6]:
for info in ds:
    print(f'{info[0]:{20}} {info[1]}')

python               50
tensorflow           100
nlp                  200


In [7]:
#>, <, ^
for info in ds:
    print(f'{info[0]:{20}} {info[1]:.>{5}}')

python               ...50
tensorflow           ..100
nlp                  ..200


## Working with Files 

In [8]:
file = open('data/data.txt', 'w')

data = 'this is sentence one'
file.write(data)
file.close()

In [9]:
len(data)

20

In [10]:
file = open('data/data.txt', 'a')

data = '\t this is sentence three'
file.write(data)
file.close()

In [11]:
file = open('data/data1.txt', 'a') # a if file is there then r+w otherwise w

data = 1
file.write(str(data))
file.close()

In [12]:
file = open('data/data2.txt', 'w') # a if file is there then r+w otherwise w

data = [1, 'one', 'this is two', 2.3]
for d in data:
    file.write(str(d))
    file.write(',')
file.close()

In [13]:
with open('data/data3.txt', 'w') as file:
    data = [1, 'one', 'this is two', 2.3]
    for d in data:
        file.write(str(d))
        file.write('\n')

## Working with Read mode 

In [14]:
file = open('data/data3.txt', 'r')

In [15]:
file.seek(0)
file.read().splitlines()

['1', 'one', 'this is two', '2.3']

In [16]:
file.seek(0)
file.readlines()

['1\n', 'one\n', 'this is two\n', '2.3\n']

In [17]:
file.seek(0)
data = file.read().splitlines()
file.close()

In [18]:
data

['1', 'one', 'this is two', '2.3']

In [19]:
eval(data[-1])

2.3

In [20]:
for index, d in enumerate(data):
    try:
        data[index] = eval(d)
    except:
        pass

In [21]:
data

[1, 'one', 'this is two', 2.3]

## Reading and Writing .CSV and .TSV Files with Pandas 

In [None]:
import pandas as pd

In [None]:
l = [(1, 'one'), (2, 'two'), (3, 'three')]
df = pd.DataFrame(l, columns=['digit', 'figure'])
df.to_csv('data/digit.csv', sep = ',', index = False)

In [None]:
pd.read_csv('data/digit.csv')

In [None]:
pd.read_csv('data/moviereviews.tsv', sep = '\t')

In [None]:
df.to_csv('data/digit.tsv', sep = '\t', index = None)

# Reading and Writing .XLSX Files with Pandas

In [None]:
df.to_excel('data/digit_sheet.xlsx', index = None, sheet_name='digit')

In [None]:
df.to_excel('data/digit_sheet.xlsx', index = None, sheet_name='digit1')

In [None]:
writer = pd.ExcelWriter('data/digit_sheet.xlsx', engine = 'xlsxwriter')
df.to_excel(writer, index = None, sheet_name='digit1')
df.to_excel(writer, index = None, sheet_name='digit2')
writer.save()
writer.close()

## Reading and Writing .JSON Files 

JSON (JavaScript Object Notation) is a popular data format used for representing structured data. It's common to transmit and receive data between a server and web application in JSON format.

In [None]:
import json

In [None]:
data_dict = {"one": "1", "two":"2"}

In [None]:
type(data_dict)

In [None]:
data_str = '{"one": "1", "two":"2"}'
type(data_str)

In [None]:
#load(), loads(), dump(), dumps()

In [None]:
json.loads(data_str)

In [None]:
json.dumps(data_dict)

In [None]:
file = open('data/data.json', 'w')
json.dump(data_str, file)
file.close()

In [None]:
file = open('data/data.json', 'r')
json_data = json.load(file)
file.close

In [None]:
json.loads(json_data)

## Reading Files from URL Links 

https://datahub.io/core/global-temp/r/monthly.json

https://raw.githubusercontent.com/laxmimerit/All-CSV-ML-Data-Files-Download/master/ecommerce.csv


In [None]:
pd.read_csv('https://raw.githubusercontent.com/laxmimerit/All-CSV-ML-Data-Files-Download/master/ecommerce.csv')

In [None]:
pd.read_json('https://datahub.io/core/global-temp/r/monthly.json')

## Extract Text Data From PDF 

In [None]:
# !pip install PyPDF2

In [None]:
import PyPDF2 as pdf

In [None]:
file = open('data/NLP.pdf', 'rb')

In [None]:
reader = pdf.PdfFileReader(file)

In [None]:
reader

In [None]:
help(reader)

In [None]:
reader.getIsEncrypted()

In [None]:
reader.getDocumentInfo()

In [None]:
reader.getNumPages()

In [None]:
page1  = reader.getPage(0).extractText()

In [None]:
page1

In [None]:
with open('data/pdf_text.txt', 'w') as file1:
    for i in range(reader.getNumPages()):
        page  = reader.getPage(i).extractText()
        file1.write(page)
        file1.write('\n')

## Record the Audio and Convert to Text

In [30]:
!pip install SpeechRecognition pyaudio

Defaulting to user installation because normal site-packages is not writeable
Collecting pyaudio
  Downloading PyAudio-0.2.14-cp312-cp312-win_amd64.whl.metadata (2.7 kB)
Downloading PyAudio-0.2.14-cp312-cp312-win_amd64.whl (164 kB)
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14


In [31]:
import speech_recognition as sr

In [32]:
r = sr.Recognizer()

In [33]:
with sr.Microphone() as source:
    print('Say Something...')
    audio = r.listen(source, timeout = 10)
    
    try:
        text = r.recognize_google(audio)
        print('You were saying:')
        print(text)
    except:
        print('Sorry. Please try again.')

Say Something...
You were saying:
hi thank you for registering 30 days of NLP


## Convert Audio in Text Data 

In [37]:
import speech_recognition as sr
import numpy as np
from scipy.io import wavfile

In [None]:
fs, data = wavfile.read('data/harvard.wav')

In [15]:
fs

44100

In [16]:
data

array([[-9.1552734e-05, -9.1552734e-05],
       [-6.1035156e-05, -6.1035156e-05],
       [-6.1035156e-05, -9.1552734e-05],
       ...,
       [-1.2207031e-04, -1.8310547e-04],
       [-1.2207031e-04, -9.1552734e-05],
       [-6.1035156e-05, -9.1552734e-05]], dtype=float32)

In [20]:
y = (np.iinfo(np.int32).max * (data/np.abs(data).max())).astype(np.int32)

In [18]:
np.iinfo(np.int32).max

2147483647

In [19]:
(data/np.abs(data).max())

array([[-0.00018557, -0.00018557],
       [-0.00012372, -0.00012372],
       [-0.00012372, -0.00018557],
       ...,
       [-0.00024743, -0.00037115],
       [-0.00024743, -0.00018557],
       [-0.00012372, -0.00018557]], dtype=float32)

In [21]:
y

array([[-398518, -398518],
       [-265679, -265679],
       [-265679, -398518],
       ...,
       [-531358, -797037],
       [-531358, -398518],
       [-265679, -398518]])

In [22]:
wavfile.write('data/harvard.wav', fs, y)

In [23]:
r = sr.Recognizer()

In [25]:
with sr.AudioFile('data/harvard.wav') as source:
    audio = r.listen(source)
    
    try:
        text = r.recognize_google(audio)
        print('Working on...')
        print(text)
    except:
        print('Sorry. Try again.')

Working on...
hello everyone how are you I am sure all of you are enjoying this lesson and I would like to also request you to please leave your rating and if you have any doubts you can comment your questions in the question and answer section


##  Text to Speech Conversion

In [42]:
text = 'hello everyone how are you I am sure all of you are enjoying this lesson and I would like to also request you to please leave your rating and if you have any doubts you can comment your questions in the question and answer section'

In [43]:
!pip install gTTS

Defaulting to user installation because normal site-packages is not writeable
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gTTS
Successfully installed gTTS-2.5.4




In [44]:
from gtts import gTTS as g

In [45]:
obj = g(text = text)
obj.save('data/pavan.mp3')

In [46]:
spanish = "Hola, te doy la bienvenida en esta lección. Dale me gusta y suscríbete a este canal. Gracias"

In [47]:
obj = g(text = spanish)
obj.save('data/pavan_spanish.mp3')