## Data Streams

### Reading Data interactively

In [1]:
#!/use/bin/env python3

name = input("Please enter your name: ")
print("Hello, " + name)

Please enter your name: New
Hello, New


In [2]:
def to_seconds(hours, minutes, seconds):
    return hours*3600+minutes*60+seconds

print("Welcome to this time converter")

cont = "y"
while(cont.lower() == "y"):
    hours = int(input("Enter the number of hours: "))
    minutes = int(input("Enter the number of minutes: "))
    seconds = int(input("Enter the number of seconds: "))
    
    print("That's {} seconds".format(to_seconds(hours, minutes, seconds)))
    print()
    cont = input("Do you want to do another conversion? [y to continue] ")
    
print("Good bye!")

Welcome to this time converter
Enter the number of hours: 5555
Enter the number of minutes: 222
Enter the number of seconds: 111
That's 20011431 seconds

Do you want to do another conversion? [y to continue] y
Enter the number of hours: 22
Enter the number of minutes: 44
Enter the number of seconds: 11
That's 81851 seconds

Do you want to do another conversion? [y to continue] b
Good bye!


### Standard Streams

In [3]:
data = input("This will come from STDIN: ")
print("Now we write it to STDOUT: " + data)
print("Now we generate an error to STDERR: " + data + 1)

This will come from STDIN: NewizZ
Now we write it to STDOUT: NewizZ


TypeError: must be str, not int

### Environment Variable

In [5]:
env

{'ALLUSERSPROFILE': 'C:\\ProgramData',
 'APPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Roaming',
 'COMMONPROGRAMFILES': 'C:\\Program Files\\Common Files',
 'COMMONPROGRAMFILES(X86)': 'C:\\Program Files (x86)\\Common Files',
 'COMMONPROGRAMW6432': 'C:\\Program Files\\Common Files',
 'COMPUTERNAME': 'CHEM-NB-5L861G2',
 'COMSPEC': 'C:\\WINDOWS\\system32\\cmd.exe',
 'DEFLOGDIR': 'C:\\ProgramData\\McAfee\\Endpoint Security\\Logs',
 'DRIVERDATA': 'C:\\Windows\\System32\\Drivers\\DriverData',
 'FPS_BROWSER_APP_PROFILE_STRING': 'Internet Explorer',
 'FPS_BROWSER_USER_PROFILE_STRING': 'Default',
 'HOMEDRIVE': 'U:',
 'HOMEPATH': '\\',
 'HOMESHARE': '\\\\rocnasfs01\\home\\wityanat',
 'LOCALAPPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Local',
 'LOGONSERVER': '\\\\CHEM03DC01',
 'NUMBER_OF_PROCESSORS': '8',
 'ONEDRIVE': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'ONEDRIVECONSUMER': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'OS': 'Windows_NT',
 'PATH': 'C:\\Users\\wityanat.CEMENTHAI\\An

In [11]:
env[PATH]

UsageError: Environment does not have key: [PATH]


In [12]:
echo $PATH

$PATH


In [13]:
import os
# use get method allow us to specify a default value when the key that we're looking for isn't in the dictionary
print("HOME: " + os.environ.get("HOME", ""))
print("SHELL: " + os.environ.get("SHELL", ""))
print("FRUIT: " + os.environ.get("FRUIT", ""))

HOME: 
SHELL: 
FRUIT: 


In [14]:
print("HOMEPATH: " + os.environ.get("HOMEPATH", ""))
print("CLICOLOR: " + os.environ.get("CLICOLOR", ""))
print("MPLBACKEND: " + os.environ.get("MPLBACKEND", ""))

HOMEPATH: \
CLICOLOR: 1
MPLBACKEND: module://ipykernel.pylab.backend_inline


In [15]:
export FRUIT=Pineapple

SyntaxError: invalid syntax (<ipython-input-15-6ff0871813fd>, line 1)

### Command-Line Arguments and Exit Status

In [16]:
#!usr/bin/env python3

import sys
print(sys.argv)

['C:\\Users\\wityanat.CEMENTHAI\\Anaconda3\\envs\\aiml\\lib\\site-packages\\ipykernel_launcher.py', '-f', 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Roaming\\jupyter\\runtime\\kernel-5a570693-054c-4542-96f5-078c49619210.json']


In [18]:
# wc variable.py

In [47]:
echo $? # Check last exit or return code value

$? # Check last exit or return code value


In [21]:
import os
import sys

filename = sys.argv[1]

if not os.path.exists(filename):
    with open(filename, "w") as f:
        f.write("New file created\n")
        
else:
    print("Error, the file {} already exists!".format(filename))
    sys.exit(1)

In [25]:
sys.argv[1]

'-f'

In [24]:
import os
import sys

filename = sys.argv[1]

if not os.path.exists(filename):
    with open(filename, "w") as f:
        f.write("New file created\n")
        
else:
    print("Error, the file {} already exists!".format(filename))
    sys.exit(1)

Error, the file -f already exists!


SystemExit: 1

## Python Subprocesses

### Running System Commands in Python

In [26]:
import subprocess
subprocess.run(["date"])

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [28]:
subprocess.run(["sleep", "2"])

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [29]:
result = subprocess.run(["ls", "this_file_does_not_exist"])

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [31]:
print(result.returncode)

NameError: name 'result' is not defined

### Obtaining the Output of a System Command

In [32]:
result = subprocess.run(["host", "8.8.8.8"], capture_output=True)
print(result.returncode)

TypeError: __init__() got an unexpected keyword argument 'capture_output'

In [33]:
print(result.stdout)

NameError: name 'result' is not defined

In [40]:
stdout = b'8.8.8.8.in-addr.arpa domain name pointer dns.google.\n'
# b : array of bytes since python recieve output from os, python don't know how to decode them (which decode?UTF-8?)

In [41]:
print(stdout.decode().split())

['8.8.8.8.in-addr.arpa', 'domain', 'name', 'pointer', 'dns.google.']


In [44]:
result = subprocess.run(["rm", "does_not_exist"], capture_output=True)
print(result.returncode)
print(result.stdout)
print(result.stderr)

TypeError: __init__() got an unexpected keyword argument 'capture_output'

### Advanced Subprocess Management

In [48]:
import os
import subprocess

my_env = os.environ.copy()
my_env["PATH"] = os.pathsep.join(["/opt/myapp/", my_env["PATH"]])

result = subprocess.run(["myapp"], env=my_env)

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [49]:
env

{'ALLUSERSPROFILE': 'C:\\ProgramData',
 'APPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Roaming',
 'COMMONPROGRAMFILES': 'C:\\Program Files\\Common Files',
 'COMMONPROGRAMFILES(X86)': 'C:\\Program Files (x86)\\Common Files',
 'COMMONPROGRAMW6432': 'C:\\Program Files\\Common Files',
 'COMPUTERNAME': 'CHEM-NB-5L861G2',
 'COMSPEC': 'C:\\WINDOWS\\system32\\cmd.exe',
 'DEFLOGDIR': 'C:\\ProgramData\\McAfee\\Endpoint Security\\Logs',
 'DRIVERDATA': 'C:\\Windows\\System32\\Drivers\\DriverData',
 'FPS_BROWSER_APP_PROFILE_STRING': 'Internet Explorer',
 'FPS_BROWSER_USER_PROFILE_STRING': 'Default',
 'HOMEDRIVE': 'U:',
 'HOMEPATH': '\\',
 'HOMESHARE': '\\\\rocnasfs01\\home\\wityanat',
 'LOCALAPPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Local',
 'LOGONSERVER': '\\\\CHEM03DC01',
 'NUMBER_OF_PROCESSORS': '8',
 'ONEDRIVE': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'ONEDRIVECONSUMER': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'OS': 'Windows_NT',
 'PATH': 'C:\\Users\\wityanat.CEMENTHAI\\An

In [50]:
my_env

{'ALLUSERSPROFILE': 'C:\\ProgramData',
 'APPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Roaming',
 'COMMONPROGRAMFILES': 'C:\\Program Files\\Common Files',
 'COMMONPROGRAMFILES(X86)': 'C:\\Program Files (x86)\\Common Files',
 'COMMONPROGRAMW6432': 'C:\\Program Files\\Common Files',
 'COMPUTERNAME': 'CHEM-NB-5L861G2',
 'COMSPEC': 'C:\\WINDOWS\\system32\\cmd.exe',
 'DEFLOGDIR': 'C:\\ProgramData\\McAfee\\Endpoint Security\\Logs',
 'DRIVERDATA': 'C:\\Windows\\System32\\Drivers\\DriverData',
 'FPS_BROWSER_APP_PROFILE_STRING': 'Internet Explorer',
 'FPS_BROWSER_USER_PROFILE_STRING': 'Default',
 'HOMEDRIVE': 'U:',
 'HOMEPATH': '\\',
 'HOMESHARE': '\\\\rocnasfs01\\home\\wityanat',
 'LOCALAPPDATA': 'C:\\Users\\wityanat.CEMENTHAI\\AppData\\Local',
 'LOGONSERVER': '\\\\CHEM03DC01',
 'NUMBER_OF_PROCESSORS': '8',
 'ONEDRIVE': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'ONEDRIVECONSUMER': 'C:\\Users\\wityanat.CEMENTHAI\\OneDrive',
 'OS': 'Windows_NT',
 'PATH': '/opt/myapp/;C:\\Users\\wityanat.C

## Processing Log Files

### Filtering Log Files with Regular Expressions

In [53]:
import sys

logfile = sys.argv[1]
with open(logfile) as f:
    for line in f:
        print(line.strip())

New file created


In [55]:
import sys

# logfile = sys.argv[1]
logfile = "syslog"
with open(logfile) as f:
    for line in f:
        if "CRON" not in line:
            continue
        print(line.strip())

Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)
Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)
Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)
Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)


In [57]:
import re
pattern = r"USER \((\w+)\)$"
line = "Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)"
result = re.search(pattern, line)

In [59]:
print(result[1])

naughty_user


In [60]:
import sys
import re
# logfile = sys.argv[1]
logfile = "syslog"
with open(logfile) as f:
    for line in f:
        if "CRON" not in line:
            continue
        pattern = r"USER \((\w+)\)$"
        result = re.search(pattern, line)
        print(result[1])

good_user
naughty_user
naughty_user
naughty_user


In [99]:
import re
def show_time_of_pid(line):
  pattern = r"(\w+ \d+ \d+:\d+:\d+)[\w .=]+\[(\d+)\]"
  result = re.search(pattern, line)
#   print(result)
#   print(result[1], result[2])
  return "{} pid:{}".format(result[1], result[2])

print(show_time_of_pid("Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)")) # Jul 6 14:01:23 pid:29440

print(show_time_of_pid("Jul 6 14:02:08 computer.name jam_tag=psim[29187]: (UUID:006)")) # Jul 6 14:02:08 pid:29187

print(show_time_of_pid("Jul 6 14:02:09 computer.name jam_tag=psim[29187]: (UUID:007)")) # Jul 6 14:02:09 pid:29187

print(show_time_of_pid("Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:03:01 pid:29440

print(show_time_of_pid("Jul 6 14:03:40 computer.name cacheclient[29807]: start syncing from \"0xDEADBEEF\"")) # Jul 6 14:03:40 pid:29807

print(show_time_of_pid("Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:04:01 pid:29440

print(show_time_of_pid("Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:05:01 pid:29440

Jul 6 14:01:23 pid:29440
Jul 6 14:02:08 pid:29187
Jul 6 14:02:09 pid:29187
Jul 6 14:03:01 pid:29440
Jul 6 14:03:40 pid:29807
Jul 6 14:04:01 pid:29440
Jul 6 14:05:01 pid:29440


### Making Sense out of the Data

In [1]:
usernames = {}
name = "good_user"
usernames[name] = usernames.get(name, 0) + 1
print(usernames)

{'good_user': 1}


In [2]:
usernames[name] = usernames.get(name, 0) + 1
print(usernames)

{'good_user': 2}


In [3]:
import sys
import re
# logfile = sys.argv[1]
logfile = "syslog"
usernames = {}
with open(logfile) as f:
    for line in f:
        if "CRON" not in line:
            continue
        pattern = r"USER \((\w+)\)$"
        result = re.search(pattern, line)
        if result is None:
            continue
        name = result[1]
        usernames[name] = usernames.get(name, 0) + 1

print(usernames)

{'good_user': 1, 'naughty_user': 3}


### Working with Log Files

In [6]:
# #!/usr/bin/env python3
# import sys
# import os
# import re


# def error_search(log_file):
#     error = input("What is the error? ")
#     returned_errors = []
#     with open(log_file, mode='r',encoding='UTF-8') as file:
#         for log in  file.readlines():
#             error_patterns = ["error"]
#         for i in range(len(error.split(' '))):
#             error_patterns.append(r"{}".format(error.split(' ')[i].lower()))
#         if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns):
#             returned_errors.append(log)
#         file.close()
#     return returned_errors

  
# def file_output(returned_errors):
# #   with open(os.path.expanduser('~') + '/data/errors_found.log', 'w') as file:
#     with open('errors_found.log', 'w') as file:
#         for error in returned_errors:
#             file.write(error)
#         file.close()
# if __name__ == "__main__":
# #   log_file = sys.argv[1]
#     log_file = "fishy.log"
#     returned_errors = error_search(log_file)
#     file_output(returned_errors)
#     sys.exit(0)

What is the error? CRON ERROR Failed to start


SystemExit: 0

In [7]:
%tb

SystemExit: 0

In [44]:
#!/usr/bin/env python3

import sys
import os
import re

def error_search(log_file):
    error = input("What is the error? ")
    returned_errors = []
    with open(log_file, mode='r',encoding='UTF-8') as file:
        for log in file.readlines():
            error_patterns = ["error"]
            for i in range(len(error.split(' '))):
                print(error_patterns)
                error_patterns.append(r"{}".format(error.split(' ')[i].lower()))
            if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns):
                print(log)
                returned_errors.append(log)
        file.close()
        print(returned_errors)
    return returned_errors

In [45]:
def file_output(returned_errors):
#     with open(os.path.expanduser('~') + '\data\errors_found.log', 'w') as file:
    with open('errors_found.log', 'w') as file:
        for error in returned_errors:
            file.write(error)
        file.close()

In [46]:
# Insert "CRON ERROR Failed to start"
if __name__ == "__main__":
#     log_file = sys.argv[1]
    log_file = "fishy.log"
    returned_errors = error_search(log_file)
    file_output(returned_errors)
#     sys.exit(0)

What is the error? CRON ERROR Failed to start
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']
['error', 'cron']
['error', 'cron', 'error']
['error', 'cron', 'error', 'failed']
['error', 'cron', 'error', 'failed', 'to']
['error']