## Input/Output

In [1]:
# prints to stdout
print('Hello!')

Hello!


In [2]:
import sys

sys.stdout.write('Hello again!\n')

Hello again!


13

In [3]:
sys.stderr.write('Danger!\n')

Danger!


8

In [4]:
print('Alarm!', file=sys.stderr)

Alarm!


## Files

In [5]:
file = open('Sentence.txt')
content = file.read()
file.close()

In [6]:
print(content)

All models are wrong, but some are useful


In [7]:
file.read()

ValueError: I/O operation on closed file.

In [8]:
# using context manager is preferrable
# f = open("nile.txt")
with open("nile.txt") as f:
    numbers = f.read()
    # f.close() is called automatically
print(numbers)

1120 1160 963 1210 1160 1160 813 1230 1370 1140 995 935 1110 994 1020 960 1180 799 958 1140 1100 1210 1150 1250 1260 1220 1030 1100 774 840 874 694 940 833 701 916 692 1020 1050 969 831 726 456 824 702 1120 1100 832 764 821 768 845 864 862 698 845 744 796 1040 759 781 865 845 944 984 897 822 1010 771 676 649 846 812 742 801 1040 860 874 848 890 744 749 838 1050 918 986 797 923 975 815 1020 906 901 1170 912 746 919 718 714 740



**Exercise**. Write all numbers from `nile.txt` into a new file named `nile2.txt`, each number on a new line.

In [9]:
with open("nile2.txt", mode='w') as f:
    f.write('\n'.join(numbers.split()))

In [11]:
with open("nile2.txt") as f:
    for line in f:
        print(line, end="")

1120
1160
963
1210
1160
1160
813
1230
1370
1140
995
935
1110
994
1020
960
1180
799
958
1140
1100
1210
1150
1250
1260
1220
1030
1100
774
840
874
694
940
833
701
916
692
1020
1050
969
831
726
456
824
702
1120
1100
832
764
821
768
845
864
862
698
845
744
796
1040
759
781
865
845
944
984
897
822
1010
771
676
649
846
812
742
801
1040
860
874
848
890
744
749
838
1050
918
986
797
923
975
815
1020
906
901
1170
912
746
919
718
714
740

### Appending a file

What if we want to append the sentence *George E. P. Box* to `Sentence.txt`?

In [12]:
with open("Sentence.txt", "a") as f:
    f.write("\nGeorge E. P. Box")

In [13]:
with open("Sentence.txt") as f:
    print(f.read())

All models are wrong, but some are useful
George E. P. Box


Can read not only `*.txt` files

In [15]:
with open("titanic.csv") as f:
    lines = f.readlines()
    
print(len(lines), type(lines))

892 <class 'list'>


In [16]:
lines[0]

'survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,\n'

In [17]:
lines[1]

'0,3,male,22,1,0,7.25,S,Third,man,TRUE,,Southampton,no,FALSE,\n'

However, it's recommended to use Pandas for opening `*.csv` files:

In [18]:
import pandas as pd
df = pd.read_csv('titanic.csv')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,Unnamed: 15
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   survived     891 non-null    int64  
 1   pclass       891 non-null    int64  
 2   sex          891 non-null    object 
 3   age          714 non-null    float64
 4   sibsp        891 non-null    int64  
 5   parch        891 non-null    int64  
 6   fare         891 non-null    float64
 7   embarked     889 non-null    object 
 8   class        891 non-null    object 
 9   who          891 non-null    object 
 10  adult_male   891 non-null    bool   
 11  deck         203 non-null    object 
 12  embark_town  889 non-null    object 
 13  alive        891 non-null    object 
 14  alone        891 non-null    bool   
 15  Unnamed: 15  0 non-null      float64
dtypes: bool(2), float64(3), int64(4), object(7)
memory usage: 99.3+ KB


Editing `pd.DataFrame` and writing back to a file

In [20]:
df_dropped = df.drop(['Unnamed: 15', 'who', 'adult_male', 'alone'], axis=1)
df_dropped.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,deck,embark_town,alive
0,0,3,male,22.0,1,0,7.25,S,Third,,Southampton,no
1,1,1,female,38.0,1,0,71.2833,C,First,C,Cherbourg,yes
2,1,3,female,26.0,0,0,7.925,S,Third,,Southampton,yes
3,1,1,female,35.0,1,0,53.1,S,First,C,Southampton,yes
4,0,3,male,35.0,0,0,8.05,S,Third,,Southampton,no


In [25]:
df_dropped.to_csv("titanic_dropped.csv", index=False)

In [26]:
pd.read_csv("titanic_dropped.csv")

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,deck,embark_town,alive
0,0,3,male,22.0,1,0,7.2500,S,Third,,Southampton,no
1,1,1,female,38.0,1,0,71.2833,C,First,C,Cherbourg,yes
2,1,3,female,26.0,0,0,7.9250,S,Third,,Southampton,yes
3,1,1,female,35.0,1,0,53.1000,S,First,C,Southampton,yes
4,0,3,male,35.0,0,0,8.0500,S,Third,,Southampton,no
...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,,Southampton,no
887,1,1,female,19.0,0,0,30.0000,S,First,B,Southampton,yes
888,0,3,female,,1,2,23.4500,S,Third,,Southampton,no
889,1,1,male,26.0,0,0,30.0000,C,First,C,Cherbourg,yes


In [27]:
df_json = pd.read_json('example.json')
df_json

Unnamed: 0,Product,Price
0,Desktop Computer,700
1,Tablet,250
2,iPhone,800
3,Laptop,1200


In [28]:
df_json = pd.concat((df_json, pd.DataFrame({"Product": "Supercomputer", "Price": 100500}, index=[0,])), ignore_index=True)

In [29]:
df_json

Unnamed: 0,Product,Price
0,Desktop Computer,700
1,Tablet,250
2,iPhone,800
3,Laptop,1200
4,Supercomputer,100500


In [30]:
df_json.to_json("example2.json")

In [31]:
import json

with open("example.json", encoding="UTF-8") as f:
    records = json.load(f)
records

{'Product': {'0': 'Desktop Computer',
  '1': 'Tablet',
  '2': 'iPhone',
  '3': 'Laptop'},
 'Price': {'0': 700, '1': 250, '2': 800, '3': 1200}}

In [32]:
records["Product"]

{'0': 'Desktop Computer', '1': 'Tablet', '2': 'iPhone', '3': 'Laptop'}

In [34]:
records["Product"]['4'] = 'Supercomputer'
records["Price"]['4'] = 100500
with open("example3.json", "w", encoding="UTF-8") as file_out:
    json.dump(records, file_out, ensure_ascii=False, indent=2)

In [None]:
df_xml = pd.read_xml("statisticians.xml")
df_xml

In [35]:
trans = """
        А — A
    Б — B
    В — V
    Г — G
    Д — D
    Е — E
    Ё — E
    Ж — ZH
    З — Z
    И — I
    Й — I
    К — K
    Л — L
    М — M
    Н — N
    О — O
    П — P
    Р — R
    С — S
    Т — T
    У — U
    Ф — F
    Х — KH
    Ц — TC
    Ч — CH
    Ш — SH
    Щ — SHCH
    Ы — Y
    Э — E
    Ю — IU
    Я — IA
"""

for item in trans.strip().split('\n'):
    item = item.strip()
    letters = item.split('—')
    print("'" + letters[0].strip() + "'" + ': ' + "'" + letters[1].strip() + "',")

'А': 'A',
'Б': 'B',
'В': 'V',
'Г': 'G',
'Д': 'D',
'Е': 'E',
'Ё': 'E',
'Ж': 'ZH',
'З': 'Z',
'И': 'I',
'Й': 'I',
'К': 'K',
'Л': 'L',
'М': 'M',
'Н': 'N',
'О': 'O',
'П': 'P',
'Р': 'R',
'С': 'S',
'Т': 'T',
'У': 'U',
'Ф': 'F',
'Х': 'KH',
'Ц': 'TC',
'Ч': 'CH',
'Ш': 'SH',
'Щ': 'SHCH',
'Ы': 'Y',
'Э': 'E',
'Ю': 'IU',
'Я': 'IA',


In [36]:
A = {1, 2, 3, 4}
B = {3, 4, 5, 6, 7}

In [37]:
A.union(B)

{1, 2, 3, 4, 5, 6, 7}

In [40]:
A.intersection(B)

{3, 4}

In [42]:
A - B, B - A

({1, 2}, {5, 6, 7})

In [43]:
A.symmetric_difference(B)

{1, 2, 5, 6, 7}