Skip to content
This repository was archived by the owner on May 25, 2022. It is now read-only.

Commit 189bb8d

Browse files
committed
Split Files
1 parent b563065 commit 189bb8d

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

projects/Split_File/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Split Files
2+
##### Execute
3+
`python split_files.py <csv/text_file> <split/line_number`

projects/Split_File/split_files.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import sys
2+
import os
3+
import shutil
4+
import pandas as pd
5+
6+
class Split_Files:
7+
'''
8+
Class file for split file program
9+
'''
10+
def __init__(self, filename, split_number):
11+
'''
12+
Getting the file name and the split index
13+
Initializing the output directory, if present then truncate it.
14+
Getting the file extension
15+
'''
16+
self.file_name = filename
17+
self.directory = "file_split"
18+
self.split = int(split_number)
19+
if os.path.exists(self.directory):
20+
shutil.rmtree(self.directory)
21+
os.mkdir(self.directory)
22+
if self.file_name.endswith('.txt'):
23+
self.file_extension = '.txt'
24+
else:
25+
self.file_extension = '.csv'
26+
self.file_number = 1
27+
28+
def split_data(self):
29+
'''
30+
spliting the input csv/txt file according to the index provided
31+
'''
32+
data = pd.read_csv(self.file_name, header=None)
33+
data.index += 1
34+
35+
split_frame = pd.DataFrame()
36+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
37+
38+
for i in range(1, len(data)+1):
39+
split_frame = split_frame.append(data.iloc[i-1])
40+
if i % self.split == 0:
41+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
42+
if self.file_extension == '.txt':
43+
split_frame.to_csv(output_file, header=False, index=False, sep=' ')
44+
else:
45+
split_frame.to_csv(output_file, header=False, index=False)
46+
split_frame.drop(split_frame.index, inplace=True)
47+
self.file_number += 1
48+
if not split_frame.empty:
49+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
50+
split_frame.to_csv(output_file, header=False, index=False)
51+
52+
if __name__ == '__main__':
53+
file, split_number = sys.argv[1], sys.argv[2]
54+
sp = Split_Files(file, split_number)
55+
sp.split_data()

0 commit comments

Comments
 (0)